{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cee7b796",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0ee7f481",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('C:\\\\Users\\\\96408\\\\Desktop\\\\adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "67efc6e3-1b26-4a1e-a219-a0d0b8b553ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.dropna(0,'any',inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b23a9bfb-8852-4fad-9ae1-72858a38a9c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>77516.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>83311.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>215646.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>234721.0</td>\n",
       "      <td>11th</td>\n",
       "      <td>7.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>338409.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>Cuba</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age          workclass    fnlwgt   education  education-num  \\\n",
       "0  39.0          State-gov   77516.0   Bachelors           13.0   \n",
       "1  50.0   Self-emp-not-inc   83311.0   Bachelors           13.0   \n",
       "2  38.0            Private  215646.0     HS-grad            9.0   \n",
       "3  53.0            Private  234721.0        11th            7.0   \n",
       "4  28.0            Private  338409.0   Bachelors           13.0   \n",
       "\n",
       "        marital-status          occupation    relationship    race      sex  \\\n",
       "0        Never-married        Adm-clerical   Not-in-family   White     Male   \n",
       "1   Married-civ-spouse     Exec-managerial         Husband   White     Male   \n",
       "2             Divorced   Handlers-cleaners   Not-in-family   White     Male   \n",
       "3   Married-civ-spouse   Handlers-cleaners         Husband   Black     Male   \n",
       "4   Married-civ-spouse      Prof-specialty            Wife   Black   Female   \n",
       "\n",
       "   capital-gain  capital-loss  hours-per-week  native-country  \\\n",
       "0        2174.0           0.0            40.0   United-States   \n",
       "1           0.0           0.0            13.0   United-States   \n",
       "2           0.0           0.0            40.0   United-States   \n",
       "3           0.0           0.0            40.0   United-States   \n",
       "4           0.0           0.0            40.0            Cuba   \n",
       "\n",
       "  Listing of attributes  \n",
       "0                 <=50K  \n",
       "1                 <=50K  \n",
       "2                 <=50K  \n",
       "3                 <=50K  \n",
       "4                 <=50K  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4a7c1d38",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[39. 50. 38. 53. 28. 37. 49. 52. 31. 42. 30. 23. 32. 40. 34. 25. 43. 54.\n",
      " 35. 59. 56. 19. 20. 45. 22. 48. 21. 24. 57. 44. 41. 29. 18. 47. 46. 36.\n",
      " 79. 27. 67. 33. 76. 17. 55. 61. 70. 64. 71. 68. 66. 51. 58. 26. 60. 90.\n",
      " 75. 65. 77. 62. 63. 80. 72. 74. 69. 73. 81. 78. 88. 82. 83. 84. 85. 86.\n",
      " 87.]\n",
      "[' State-gov' ' Self-emp-not-inc' ' Private' ' Federal-gov' ' Local-gov'\n",
      " ' ?' ' Self-emp-inc' ' Without-pay' ' Never-worked']\n",
      "[ 77516.  83311. 215646. ...  34066.  84661. 257302.]\n",
      "[' Bachelors' ' HS-grad' ' 11th' ' Masters' ' 9th' ' Some-college'\n",
      " ' Assoc-acdm' ' Assoc-voc' ' 7th-8th' ' Doctorate' ' Prof-school'\n",
      " ' 5th-6th' ' 10th' ' 1st-4th' ' Preschool' ' 12th']\n",
      "[13.  9.  7. 14.  5. 10. 12. 11.  4. 16. 15.  3.  6.  2.  1.  8.]\n",
      "[' Never-married' ' Married-civ-spouse' ' Divorced'\n",
      " ' Married-spouse-absent' ' Separated' ' Married-AF-spouse' ' Widowed']\n",
      "[' Adm-clerical' ' Exec-managerial' ' Handlers-cleaners' ' Prof-specialty'\n",
      " ' Other-service' ' Sales' ' Craft-repair' ' Transport-moving'\n",
      " ' Farming-fishing' ' Machine-op-inspct' ' Tech-support' ' ?'\n",
      " ' Protective-serv' ' Armed-Forces' ' Priv-house-serv']\n",
      "[' Not-in-family' ' Husband' ' Wife' ' Own-child' ' Unmarried'\n",
      " ' Other-relative']\n",
      "[' White' ' Black' ' Asian-Pac-Islander' ' Amer-Indian-Eskimo' ' Other']\n",
      "[' Male' ' Female']\n",
      "[ 2174.     0. 14084.  5178.  5013.  2407. 14344. 15024.  7688. 34095.\n",
      "  4064.  4386.  7298.  1409.  3674.  1055.  3464.  2050.  2176.   594.\n",
      " 20051.  6849.  4101.  1111.  8614.  3411.  2597. 25236.  4650.  9386.\n",
      "  2463.  3103. 10605.  2964.  3325.  2580.  3471.  4865. 99999.  6514.\n",
      "  1471.  2329.  2105.  2885. 25124. 10520.  2202.  2961. 27828.  6767.\n",
      "  2228.  1506. 13550.  2635.  5556.  4787.  3781.  3137.  3818.  3942.\n",
      "   914.   401.  2829.  2977.  4934.  2062.  2354.  5455. 15020.  1424.\n",
      "  3273. 22040.  4416.  3908. 10566.   991.  4931.  1086.  7430.  6497.\n",
      "   114.  7896.  2346.  3418.  3432.  2907.  1151.  2414.  2290. 15831.\n",
      " 41310.  4508.  2538.  3456.  6418.  1848.  3887.  5721.  9562.  1455.\n",
      "  2036.  1831. 11678.  2936.  2993.  7443.  6360.  1797.  1173.  4687.\n",
      "  6723.  2009.  6097.  2653.  1639. 18481.  7978.  2387.  5060.]\n",
      "[   0. 2042. 1408. 1902. 1573. 1887. 1719. 1762. 1564. 2179. 1816. 1980.\n",
      " 1977. 1876. 1340. 2206. 1741. 1485. 2339. 2415. 1380. 1721. 2051. 2377.\n",
      " 1669. 2352. 1672.  653. 2392. 1504. 2001. 1590. 1651. 1628. 1848. 1740.\n",
      " 2002. 1579. 2258. 1602.  419. 2547. 2174. 2205. 1726. 2444. 1138. 2238.\n",
      "  625.  213. 1539.  880. 1668. 1092. 1594. 3004. 2231. 1844.  810. 2824.\n",
      " 2559. 2057. 1974.  974. 2149. 1825. 1735. 1258. 2129. 2603. 2282.  323.\n",
      " 4356. 2246. 1617. 1648. 2489. 3770. 1755. 3683. 2267. 2080. 2457.  155.\n",
      " 3900. 2201. 1944. 2467. 2163. 2754. 2472. 1411.]\n",
      "[40. 13. 16. 45. 50. 80. 30. 35. 60. 20. 52. 44. 15. 25. 38. 43. 55. 48.\n",
      " 58. 32. 70.  2. 22. 56. 41. 28. 36. 24. 46. 42. 12. 65.  1. 10. 34. 75.\n",
      " 98. 33. 54.  8.  6. 64. 19. 18. 72.  5.  9. 47. 37. 21. 26. 14.  4. 59.\n",
      "  7. 99. 53. 39. 62. 57. 78. 90. 66. 11. 49. 84.  3. 17. 68. 27. 85. 31.\n",
      " 51. 77. 63. 23. 87. 88. 73. 89. 97. 94. 29. 96. 67. 82. 86. 91. 81. 76.\n",
      " 92. 61. 74. 95.]\n",
      "[' United-States' ' Cuba' ' Jamaica' ' India' ' ?' ' Mexico' ' South'\n",
      " ' Puerto-Rico' ' Honduras' ' England' ' Canada' ' Germany' ' Iran'\n",
      " ' Philippines' ' Italy' ' Poland' ' Columbia' ' Cambodia' ' Thailand'\n",
      " ' Ecuador' ' Laos' ' Taiwan' ' Haiti' ' Portugal' ' Dominican-Republic'\n",
      " ' El-Salvador' ' France' ' Guatemala' ' China' ' Japan' ' Yugoslavia'\n",
      " ' Peru' ' Outlying-US(Guam-USVI-etc)' ' Scotland' ' Trinadad&Tobago'\n",
      " ' Greece' ' Nicaragua' ' Vietnam' ' Hong' ' Ireland' ' Hungary'\n",
      " ' Holand-Netherlands']\n",
      "[' <=50K' ' >50K']\n"
     ]
    }
   ],
   "source": [
    "for i in df.columns.tolist():\n",
    "    print(df.loc[:,i].unique())\n",
    "# 发现 workclass，occupation，native—country存在缺失值，标签为？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9a9bee45-1bbd-4c4b-a1c3-061a44a74f7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 把字符串前面的空格去掉"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "355272d3-e962-4c4b-bbf6-a76f8ef7eaee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['State-gov',\n",
       " 'Self-emp-not-inc',\n",
       " 'Private',\n",
       " 'Federal-gov',\n",
       " 'Local-gov',\n",
       " '?',\n",
       " 'Self-emp-inc',\n",
       " 'Without-pay',\n",
       " 'Never-worked']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_1 = []\n",
    "for i in df.iloc[:,1].unique().tolist():\n",
    "    new_1.append(i.strip())\n",
    "new_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad36e3d5-51dd-4ad8-b0b2-2df919fa34c6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de105f4f-e9ca-4db6-aced-fbd8902d8866",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2c3b87f-4278-4bb4-9f91-3639626fccc2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9cfbf23-5a25-4afb-bd12-54feaaec34d9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "588eb00e-59de-4735-ba0c-025a601076f2",
   "metadata": {},
   "source": [
    "# 填补workclass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b78d27d8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>54.0</td>\n",
       "      <td>?</td>\n",
       "      <td>180211.0</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>?</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>South</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>32.0</td>\n",
       "      <td>?</td>\n",
       "      <td>293936.0</td>\n",
       "      <td>7th-8th</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Married-spouse-absent</td>\n",
       "      <td>?</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>25.0</td>\n",
       "      <td>?</td>\n",
       "      <td>200681.0</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>?</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>67.0</td>\n",
       "      <td>?</td>\n",
       "      <td>212759.0</td>\n",
       "      <td>10th</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>?</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>17.0</td>\n",
       "      <td>?</td>\n",
       "      <td>304873.0</td>\n",
       "      <td>10th</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>?</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>34095.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      age workclass    fnlwgt      education  education-num  \\\n",
       "27   54.0         ?  180211.0   Some-college           10.0   \n",
       "61   32.0         ?  293936.0        7th-8th            4.0   \n",
       "69   25.0         ?  200681.0   Some-college           10.0   \n",
       "77   67.0         ?  212759.0           10th            6.0   \n",
       "106  17.0         ?  304873.0           10th            6.0   \n",
       "\n",
       "             marital-status occupation    relationship                 race  \\\n",
       "27       Married-civ-spouse          ?         Husband   Asian-Pac-Islander   \n",
       "61    Married-spouse-absent          ?   Not-in-family                White   \n",
       "69            Never-married          ?       Own-child                White   \n",
       "77       Married-civ-spouse          ?         Husband                White   \n",
       "106           Never-married          ?       Own-child                White   \n",
       "\n",
       "         sex  capital-gain  capital-loss  hours-per-week  native-country  \\\n",
       "27      Male           0.0           0.0            60.0           South   \n",
       "61      Male           0.0           0.0            40.0               ?   \n",
       "69      Male           0.0           0.0            40.0   United-States   \n",
       "77      Male           0.0           0.0             2.0   United-States   \n",
       "106   Female       34095.0           0.0            32.0   United-States   \n",
       "\n",
       "    Listing of attributes  \n",
       "27                   >50K  \n",
       "61                  <=50K  \n",
       "69                  <=50K  \n",
       "77                  <=50K  \n",
       "106                 <=50K  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " df.loc[df.workclass == ' ?'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1c6a86f1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1836, 15)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df.workclass == ' ?'].shape #1836条的缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "670483cb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>77516.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>31.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>45781.0</td>\n",
       "      <td>Masters</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>14084.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>42.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>159449.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>5178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>30.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>188146.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>5013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>30.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>59496.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Sales</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2407.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32515</th>\n",
       "      <td>66.0</td>\n",
       "      <td>Federal-gov</td>\n",
       "      <td>47358.0</td>\n",
       "      <td>10th</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Craft-repair</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>3471.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32518</th>\n",
       "      <td>57.0</td>\n",
       "      <td>Local-gov</td>\n",
       "      <td>110417.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Craft-repair</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>99999.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32538</th>\n",
       "      <td>38.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>139180.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>15020.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32548</th>\n",
       "      <td>65.0</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>99359.0</td>\n",
       "      <td>Prof-school</td>\n",
       "      <td>15.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>1086.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32560</th>\n",
       "      <td>52.0</td>\n",
       "      <td>Self-emp-inc</td>\n",
       "      <td>287927.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Wife</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>15024.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2712 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age          workclass    fnlwgt     education  education-num  \\\n",
       "0      39.0          State-gov   77516.0     Bachelors           13.0   \n",
       "8      31.0            Private   45781.0       Masters           14.0   \n",
       "9      42.0            Private  159449.0     Bachelors           13.0   \n",
       "59     30.0            Private  188146.0       HS-grad            9.0   \n",
       "60     30.0            Private   59496.0     Bachelors           13.0   \n",
       "...     ...                ...       ...           ...            ...   \n",
       "32515  66.0        Federal-gov   47358.0          10th            6.0   \n",
       "32518  57.0          Local-gov  110417.0       HS-grad            9.0   \n",
       "32538  38.0            Private  139180.0     Bachelors           13.0   \n",
       "32548  65.0   Self-emp-not-inc   99359.0   Prof-school           15.0   \n",
       "32560  52.0       Self-emp-inc  287927.0       HS-grad            9.0   \n",
       "\n",
       "            marital-status          occupation    relationship    race  \\\n",
       "0            Never-married        Adm-clerical   Not-in-family   White   \n",
       "8            Never-married      Prof-specialty   Not-in-family   White   \n",
       "9       Married-civ-spouse     Exec-managerial         Husband   White   \n",
       "59      Married-civ-spouse   Machine-op-inspct         Husband   White   \n",
       "60      Married-civ-spouse               Sales         Husband   White   \n",
       "...                    ...                 ...             ...     ...   \n",
       "32515   Married-civ-spouse        Craft-repair         Husband   White   \n",
       "32518   Married-civ-spouse        Craft-repair         Husband   White   \n",
       "32538             Divorced      Prof-specialty       Unmarried   Black   \n",
       "32548        Never-married      Prof-specialty   Not-in-family   White   \n",
       "32560   Married-civ-spouse     Exec-managerial            Wife   White   \n",
       "\n",
       "           sex  capital-gain  capital-loss  hours-per-week  native-country  \\\n",
       "0         Male        2174.0           0.0            40.0   United-States   \n",
       "8       Female       14084.0           0.0            50.0   United-States   \n",
       "9         Male        5178.0           0.0            40.0   United-States   \n",
       "59        Male        5013.0           0.0            40.0   United-States   \n",
       "60        Male        2407.0           0.0            40.0   United-States   \n",
       "...        ...           ...           ...             ...             ...   \n",
       "32515     Male        3471.0           0.0            40.0   United-States   \n",
       "32518     Male       99999.0           0.0            40.0   United-States   \n",
       "32538   Female       15020.0           0.0            45.0   United-States   \n",
       "32548     Male        1086.0           0.0            60.0   United-States   \n",
       "32560   Female       15024.0           0.0            40.0   United-States   \n",
       "\n",
       "      Listing of attributes  \n",
       "0                     <=50K  \n",
       "8                      >50K  \n",
       "9                      >50K  \n",
       "59                    <=50K  \n",
       "60                    <=50K  \n",
       "...                     ...  \n",
       "32515                 <=50K  \n",
       "32518                  >50K  \n",
       "32538                  >50K  \n",
       "32548                 <=50K  \n",
       "32560                  >50K  \n",
       "\n",
       "[2712 rows x 15 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看captain-gain 不等于 0的表\n",
    "df.loc[df['capital-gain'] != 0.0]\n",
    "\n",
    "# 只有2712条，资本收益=0太多了，炒股的人比较少"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "1c687089-4bf0-420e-a72f-31f020b4b82b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 删掉fnlwgt,education_num,native-country,Listing of attributes\n",
    "df_1= df.drop(['fnlwgt','education-num','native-country','Listing of attributes'], 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "b5d75c58-2232-4152-85be-e1cc4705bfee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>11th</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age          workclass   education       marital-status  \\\n",
       "0  39.0          State-gov   Bachelors        Never-married   \n",
       "1  50.0   Self-emp-not-inc   Bachelors   Married-civ-spouse   \n",
       "2  38.0            Private     HS-grad             Divorced   \n",
       "3  53.0            Private        11th   Married-civ-spouse   \n",
       "4  28.0            Private   Bachelors   Married-civ-spouse   \n",
       "\n",
       "           occupation    relationship    race      sex  capital-gain  \\\n",
       "0        Adm-clerical   Not-in-family   White     Male        2174.0   \n",
       "1     Exec-managerial         Husband   White     Male           0.0   \n",
       "2   Handlers-cleaners   Not-in-family   White     Male           0.0   \n",
       "3   Handlers-cleaners         Husband   Black     Male           0.0   \n",
       "4      Prof-specialty            Wife   Black   Female           0.0   \n",
       "\n",
       "   capital-loss  hours-per-week  \n",
       "0           0.0            40.0  \n",
       "1           0.0            13.0  \n",
       "2           0.0            40.0  \n",
       "3           0.0            40.0  \n",
       "4           0.0            40.0  "
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "81fec7b8-6878-4b92-8bd2-4bb37dc66ac0",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[39. 50. 38. 53. 28. 37. 49. 52. 31. 42. 30. 23. 32. 40. 34. 25. 43. 54.\n",
      " 35. 59. 56. 19. 20. 45. 22. 48. 21. 24. 57. 44. 41. 29. 18. 47. 46. 36.\n",
      " 79. 27. 67. 33. 76. 17. 55. 61. 70. 64. 71. 68. 66. 51. 58. 26. 60. 90.\n",
      " 75. 65. 77. 62. 63. 80. 72. 74. 69. 73. 81. 78. 88. 82. 83. 84. 85. 86.\n",
      " 87.]\n",
      "[' State-gov' ' Self-emp-not-inc' ' Private' ' Federal-gov' ' Local-gov'\n",
      " ' ?' ' Self-emp-inc' ' Without-pay' ' Never-worked']\n",
      "[' Bachelors' ' HS-grad' ' 11th' ' Masters' ' 9th' ' Some-college'\n",
      " ' Assoc-acdm' ' Assoc-voc' ' 7th-8th' ' Doctorate' ' Prof-school'\n",
      " ' 5th-6th' ' 10th' ' 1st-4th' ' Preschool' ' 12th']\n",
      "[' Never-married' ' Married-civ-spouse' ' Divorced'\n",
      " ' Married-spouse-absent' ' Separated' ' Married-AF-spouse' ' Widowed']\n",
      "[' Adm-clerical' ' Exec-managerial' ' Handlers-cleaners' ' Prof-specialty'\n",
      " ' Other-service' ' Sales' ' Craft-repair' ' Transport-moving'\n",
      " ' Farming-fishing' ' Machine-op-inspct' ' Tech-support' ' ?'\n",
      " ' Protective-serv' ' Armed-Forces' ' Priv-house-serv']\n",
      "[' Not-in-family' ' Husband' ' Wife' ' Own-child' ' Unmarried'\n",
      " ' Other-relative']\n",
      "[' White' ' Black' ' Asian-Pac-Islander' ' Amer-Indian-Eskimo' ' Other']\n",
      "[' Male' ' Female']\n",
      "[ 2174.     0. 14084.  5178.  5013.  2407. 14344. 15024.  7688. 34095.\n",
      "  4064.  4386.  7298.  1409.  3674.  1055.  3464.  2050.  2176.   594.\n",
      " 20051.  6849.  4101.  1111.  8614.  3411.  2597. 25236.  4650.  9386.\n",
      "  2463.  3103. 10605.  2964.  3325.  2580.  3471.  4865. 99999.  6514.\n",
      "  1471.  2329.  2105.  2885. 25124. 10520.  2202.  2961. 27828.  6767.\n",
      "  2228.  1506. 13550.  2635.  5556.  4787.  3781.  3137.  3818.  3942.\n",
      "   914.   401.  2829.  2977.  4934.  2062.  2354.  5455. 15020.  1424.\n",
      "  3273. 22040.  4416.  3908. 10566.   991.  4931.  1086.  7430.  6497.\n",
      "   114.  7896.  2346.  3418.  3432.  2907.  1151.  2414.  2290. 15831.\n",
      " 41310.  4508.  2538.  3456.  6418.  1848.  3887.  5721.  9562.  1455.\n",
      "  2036.  1831. 11678.  2936.  2993.  7443.  6360.  1797.  1173.  4687.\n",
      "  6723.  2009.  6097.  2653.  1639. 18481.  7978.  2387.  5060.]\n",
      "[   0. 2042. 1408. 1902. 1573. 1887. 1719. 1762. 1564. 2179. 1816. 1980.\n",
      " 1977. 1876. 1340. 2206. 1741. 1485. 2339. 2415. 1380. 1721. 2051. 2377.\n",
      " 1669. 2352. 1672.  653. 2392. 1504. 2001. 1590. 1651. 1628. 1848. 1740.\n",
      " 2002. 1579. 2258. 1602.  419. 2547. 2174. 2205. 1726. 2444. 1138. 2238.\n",
      "  625.  213. 1539.  880. 1668. 1092. 1594. 3004. 2231. 1844.  810. 2824.\n",
      " 2559. 2057. 1974.  974. 2149. 1825. 1735. 1258. 2129. 2603. 2282.  323.\n",
      " 4356. 2246. 1617. 1648. 2489. 3770. 1755. 3683. 2267. 2080. 2457.  155.\n",
      " 3900. 2201. 1944. 2467. 2163. 2754. 2472. 1411.]\n",
      "[40. 13. 16. 45. 50. 80. 30. 35. 60. 20. 52. 44. 15. 25. 38. 43. 55. 48.\n",
      " 58. 32. 70.  2. 22. 56. 41. 28. 36. 24. 46. 42. 12. 65.  1. 10. 34. 75.\n",
      " 98. 33. 54.  8.  6. 64. 19. 18. 72.  5.  9. 47. 37. 21. 26. 14.  4. 59.\n",
      "  7. 99. 53. 39. 62. 57. 78. 90. 66. 11. 49. 84.  3. 17. 68. 27. 85. 31.\n",
      " 51. 77. 63. 23. 87. 88. 73. 89. 97. 94. 29. 96. 67. 82. 86. 91. 81. 76.\n",
      " 92. 61. 74. 95.]\n"
     ]
    }
   ],
   "source": [
    "for i in df_1.columns.tolist():\n",
    "    print(df_1.loc[:,i].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "51333254-2e03-42a1-be6a-2aeab60a908c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将df_1的分类型变量变为编码,得先把有？的行提出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "3fc9fdf3-716d-404c-b39c-43c6db9dfd75",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>workclass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>State-gov</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32556</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32557</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32558</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32559</th>\n",
       "      <td>Private</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32560</th>\n",
       "      <td>Self-emp-inc</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30725 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               workclass\n",
       "0              State-gov\n",
       "1       Self-emp-not-inc\n",
       "2                Private\n",
       "3                Private\n",
       "4                Private\n",
       "...                  ...\n",
       "32556            Private\n",
       "32557            Private\n",
       "32558            Private\n",
       "32559            Private\n",
       "32560       Self-emp-inc\n",
       "\n",
       "[30725 rows x 1 columns]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wk = pd.DataFrame(df_1.loc[df.workclass!=' ?','workclass'])\n",
    "wk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "c4ebbd24-cde2-4b47-be3b-6f9a64e33bb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[' State-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Without-pay',\n",
       " ' Never-worked']"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_1 = wk['workclass'].unique().tolist()\n",
    "label_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "e63932c8-486b-47a7-b89f-8d7c73bfc722",
   "metadata": {},
   "outputs": [],
   "source": [
    "wk['workclass'] = wk['workclass'].apply(lambda x:label_1.index(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "363ae08c-6e09-4c02-983a-4bf4b29fb36e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>workclass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   workclass\n",
       "0          0\n",
       "1          1\n",
       "2          2\n",
       "3          2\n",
       "4          2"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wk.head()  # 将workclass中除了？的变量变为数值型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "9ee15469-106d-401d-9d51-6db011f6a2d3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' Self-emp-not-inc'"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.loc[1,'workclass']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "041e2733-8e8b-40a0-8963-d87afa51eab2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wk['workclass'][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "f0521e8a-29b2-4ce5-9f7c-7a0e9088f8f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将wk替换到df_1里\n",
    "for i in wk['workclass'].index.tolist():\n",
    "    df_1.loc[i,'workclass'] = wk['workclass'][i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "f9eb172a-1383-4e7e-9886-ce4bebf9dd77",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>2</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11th</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>2</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age workclass   education       marital-status          occupation  \\\n",
       "0  39.0         0   Bachelors        Never-married        Adm-clerical   \n",
       "1  50.0         1   Bachelors   Married-civ-spouse     Exec-managerial   \n",
       "2  38.0         2     HS-grad             Divorced   Handlers-cleaners   \n",
       "3  53.0         2        11th   Married-civ-spouse   Handlers-cleaners   \n",
       "4  28.0         2   Bachelors   Married-civ-spouse      Prof-specialty   \n",
       "\n",
       "     relationship    race      sex  capital-gain  capital-loss  hours-per-week  \n",
       "0   Not-in-family   White     Male        2174.0           0.0            40.0  \n",
       "1         Husband   White     Male           0.0           0.0            13.0  \n",
       "2   Not-in-family   White     Male           0.0           0.0            40.0  \n",
       "3         Husband   Black     Male           0.0           0.0            40.0  \n",
       "4            Wife   Black   Female           0.0           0.0            40.0  "
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.head() # 现在workclass里除了？都是数值型变量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bff32549-9361-4cfb-abdc-4803142a61ac",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e504a8c9-7751-4a64-95b9-b4d9032750fb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "064afd79-dc31-46e0-a969-6c2b14a26ccf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3, 4, 5, 6, 7], dtype=object)"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.loc[df.workclass!=' ?','workclass'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "33ec5227-3ca3-4b72-b8b7-9ad9e908d47d",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0,\n",
       " 1,\n",
       " 2,\n",
       " 3,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 7,\n",
       " 8,\n",
       " 9,\n",
       " 10,\n",
       " 11,\n",
       " 12,\n",
       " 13,\n",
       " 14,\n",
       " 15,\n",
       " 16,\n",
       " 17,\n",
       " 18,\n",
       " 19,\n",
       " 20,\n",
       " 21,\n",
       " 22,\n",
       " 23,\n",
       " 24,\n",
       " 25,\n",
       " 26,\n",
       " 28,\n",
       " 29,\n",
       " 30,\n",
       " 31,\n",
       " 32,\n",
       " 33,\n",
       " 34,\n",
       " 35,\n",
       " 36,\n",
       " 37,\n",
       " 38,\n",
       " 39,\n",
       " 40,\n",
       " 41,\n",
       " 42,\n",
       " 43,\n",
       " 44,\n",
       " 45,\n",
       " 46,\n",
       " 47,\n",
       " 48,\n",
       " 49,\n",
       " 50,\n",
       " 51,\n",
       " 52,\n",
       " 53,\n",
       " 54,\n",
       " 55,\n",
       " 56,\n",
       " 57,\n",
       " 58,\n",
       " 59,\n",
       " 60,\n",
       " 62,\n",
       " 63,\n",
       " 64,\n",
       " 65,\n",
       " 66,\n",
       " 67,\n",
       " 68,\n",
       " 70,\n",
       " 71,\n",
       " 72,\n",
       " 73,\n",
       " 74,\n",
       " 75,\n",
       " 76,\n",
       " 78,\n",
       " 79,\n",
       " 80,\n",
       " 81,\n",
       " 82,\n",
       " 83,\n",
       " 84,\n",
       " 85,\n",
       " 86,\n",
       " 87,\n",
       " 88,\n",
       " 89,\n",
       " 90,\n",
       " 91,\n",
       " 92,\n",
       " 93,\n",
       " 94,\n",
       " 95,\n",
       " 96,\n",
       " 97,\n",
       " 98,\n",
       " 99,\n",
       " 100,\n",
       " 101,\n",
       " 102,\n",
       " 103,\n",
       " 104,\n",
       " 105,\n",
       " 107,\n",
       " 108,\n",
       " 109,\n",
       " 110,\n",
       " 111,\n",
       " 112,\n",
       " 113,\n",
       " 114,\n",
       " 115,\n",
       " 116,\n",
       " 117,\n",
       " 118,\n",
       " 119,\n",
       " 120,\n",
       " 121,\n",
       " 122,\n",
       " 123,\n",
       " 124,\n",
       " 125,\n",
       " 126,\n",
       " 127,\n",
       " 129,\n",
       " 130,\n",
       " 131,\n",
       " 132,\n",
       " 133,\n",
       " 134,\n",
       " 135,\n",
       " 136,\n",
       " 137,\n",
       " 138,\n",
       " 139,\n",
       " 140,\n",
       " 141,\n",
       " 142,\n",
       " 143,\n",
       " 144,\n",
       " 145,\n",
       " 146,\n",
       " 147,\n",
       " 148,\n",
       " 150,\n",
       " 151,\n",
       " 152,\n",
       " 153,\n",
       " 155,\n",
       " 156,\n",
       " 157,\n",
       " 158,\n",
       " 159,\n",
       " 161,\n",
       " 162,\n",
       " 163,\n",
       " 164,\n",
       " 165,\n",
       " 166,\n",
       " 167,\n",
       " 168,\n",
       " 169,\n",
       " 170,\n",
       " 171,\n",
       " 172,\n",
       " 173,\n",
       " 174,\n",
       " 175,\n",
       " 176,\n",
       " 177,\n",
       " 178,\n",
       " 179,\n",
       " 180,\n",
       " 181,\n",
       " 182,\n",
       " 183,\n",
       " 184,\n",
       " 185,\n",
       " 186,\n",
       " 188,\n",
       " 189,\n",
       " 190,\n",
       " 191,\n",
       " 192,\n",
       " 193,\n",
       " 194,\n",
       " 195,\n",
       " 196,\n",
       " 197,\n",
       " 198,\n",
       " 199,\n",
       " 200,\n",
       " 202,\n",
       " 203,\n",
       " 204,\n",
       " 205,\n",
       " 206,\n",
       " 207,\n",
       " 208,\n",
       " 209,\n",
       " 210,\n",
       " 211,\n",
       " 212,\n",
       " 213,\n",
       " 214,\n",
       " 215,\n",
       " 216,\n",
       " 217,\n",
       " 218,\n",
       " 219,\n",
       " 220,\n",
       " 222,\n",
       " 223,\n",
       " 224,\n",
       " 225,\n",
       " 227,\n",
       " 228,\n",
       " 229,\n",
       " 230,\n",
       " 231,\n",
       " 232,\n",
       " 233,\n",
       " 234,\n",
       " 235,\n",
       " 236,\n",
       " 237,\n",
       " 238,\n",
       " 239,\n",
       " 240,\n",
       " 241,\n",
       " 242,\n",
       " 244,\n",
       " 245,\n",
       " 246,\n",
       " 247,\n",
       " 248,\n",
       " 249,\n",
       " 250,\n",
       " 251,\n",
       " 252,\n",
       " 253,\n",
       " 254,\n",
       " 255,\n",
       " 256,\n",
       " 257,\n",
       " 258,\n",
       " 259,\n",
       " 260,\n",
       " 261,\n",
       " 262,\n",
       " 263,\n",
       " 264,\n",
       " 265,\n",
       " 267,\n",
       " 268,\n",
       " 269,\n",
       " 270,\n",
       " 271,\n",
       " 272,\n",
       " 273,\n",
       " 274,\n",
       " 275,\n",
       " 276,\n",
       " 277,\n",
       " 278,\n",
       " 279,\n",
       " 280,\n",
       " 281,\n",
       " 282,\n",
       " 283,\n",
       " 284,\n",
       " 285,\n",
       " 286,\n",
       " 287,\n",
       " 288,\n",
       " 289,\n",
       " 290,\n",
       " 291,\n",
       " 292,\n",
       " 293,\n",
       " 294,\n",
       " 295,\n",
       " 296,\n",
       " 298,\n",
       " 299,\n",
       " 300,\n",
       " 301,\n",
       " 302,\n",
       " 303,\n",
       " 304,\n",
       " 305,\n",
       " 306,\n",
       " 307,\n",
       " 308,\n",
       " 309,\n",
       " 310,\n",
       " 311,\n",
       " 313,\n",
       " 314,\n",
       " 315,\n",
       " 316,\n",
       " 317,\n",
       " 318,\n",
       " 319,\n",
       " 320,\n",
       " 321,\n",
       " 322,\n",
       " 323,\n",
       " 324,\n",
       " 325,\n",
       " 327,\n",
       " 328,\n",
       " 329,\n",
       " 330,\n",
       " 331,\n",
       " 332,\n",
       " 333,\n",
       " 334,\n",
       " 335,\n",
       " 336,\n",
       " 337,\n",
       " 338,\n",
       " 339,\n",
       " 340,\n",
       " 341,\n",
       " 342,\n",
       " 343,\n",
       " 344,\n",
       " 345,\n",
       " 348,\n",
       " 349,\n",
       " 350,\n",
       " 351,\n",
       " 352,\n",
       " 353,\n",
       " 355,\n",
       " 356,\n",
       " 357,\n",
       " 358,\n",
       " 359,\n",
       " 360,\n",
       " 361,\n",
       " 362,\n",
       " 363,\n",
       " 364,\n",
       " 365,\n",
       " 366,\n",
       " 367,\n",
       " 368,\n",
       " 369,\n",
       " 370,\n",
       " 371,\n",
       " 372,\n",
       " 373,\n",
       " 374,\n",
       " 375,\n",
       " 376,\n",
       " 377,\n",
       " 378,\n",
       " 379,\n",
       " 380,\n",
       " 381,\n",
       " 382,\n",
       " 383,\n",
       " 384,\n",
       " 385,\n",
       " 386,\n",
       " 387,\n",
       " 388,\n",
       " 389,\n",
       " 390,\n",
       " 391,\n",
       " 392,\n",
       " 393,\n",
       " 394,\n",
       " 395,\n",
       " 396,\n",
       " 398,\n",
       " 399,\n",
       " 400,\n",
       " 401,\n",
       " 402,\n",
       " 403,\n",
       " 404,\n",
       " 405,\n",
       " 406,\n",
       " 407,\n",
       " 409,\n",
       " 410,\n",
       " 411,\n",
       " 412,\n",
       " 413,\n",
       " 414,\n",
       " 415,\n",
       " 416,\n",
       " 417,\n",
       " 418,\n",
       " 419,\n",
       " 420,\n",
       " 421,\n",
       " 422,\n",
       " 423,\n",
       " 424,\n",
       " 425,\n",
       " 426,\n",
       " 427,\n",
       " 428,\n",
       " 429,\n",
       " 432,\n",
       " 433,\n",
       " 434,\n",
       " 435,\n",
       " 436,\n",
       " 437,\n",
       " 438,\n",
       " 439,\n",
       " 440,\n",
       " 441,\n",
       " 442,\n",
       " 443,\n",
       " 444,\n",
       " 445,\n",
       " 446,\n",
       " 447,\n",
       " 448,\n",
       " 450,\n",
       " 451,\n",
       " 452,\n",
       " 453,\n",
       " 454,\n",
       " 455,\n",
       " 456,\n",
       " 457,\n",
       " 458,\n",
       " 460,\n",
       " 461,\n",
       " 462,\n",
       " 463,\n",
       " 464,\n",
       " 465,\n",
       " 466,\n",
       " 467,\n",
       " 468,\n",
       " 469,\n",
       " 470,\n",
       " 472,\n",
       " 473,\n",
       " 474,\n",
       " 475,\n",
       " 476,\n",
       " 477,\n",
       " 478,\n",
       " 479,\n",
       " 480,\n",
       " 481,\n",
       " 482,\n",
       " 483,\n",
       " 485,\n",
       " 487,\n",
       " 488,\n",
       " 489,\n",
       " 490,\n",
       " 491,\n",
       " 492,\n",
       " 493,\n",
       " 494,\n",
       " 495,\n",
       " 496,\n",
       " 497,\n",
       " 498,\n",
       " 500,\n",
       " 501,\n",
       " 502,\n",
       " 503,\n",
       " 504,\n",
       " 505,\n",
       " 506,\n",
       " 507,\n",
       " 508,\n",
       " 509,\n",
       " 510,\n",
       " 512,\n",
       " 513,\n",
       " 514,\n",
       " 516,\n",
       " 519,\n",
       " 520,\n",
       " 521,\n",
       " 522,\n",
       " 523,\n",
       " 524,\n",
       " 525,\n",
       " 526,\n",
       " 527,\n",
       " 528,\n",
       " 529,\n",
       " 530,\n",
       " 531,\n",
       " 532,\n",
       " 533,\n",
       " 534,\n",
       " 535,\n",
       " 536,\n",
       " 537,\n",
       " 538,\n",
       " 540,\n",
       " 541,\n",
       " 542,\n",
       " 543,\n",
       " 544,\n",
       " 545,\n",
       " 546,\n",
       " 547,\n",
       " 548,\n",
       " 549,\n",
       " 550,\n",
       " 551,\n",
       " 552,\n",
       " 553,\n",
       " 554,\n",
       " 555,\n",
       " 556,\n",
       " 557,\n",
       " 558,\n",
       " 559,\n",
       " 560,\n",
       " 561,\n",
       " 562,\n",
       " 563,\n",
       " 564,\n",
       " 565,\n",
       " 566,\n",
       " 567,\n",
       " 568,\n",
       " 569,\n",
       " 570,\n",
       " 571,\n",
       " 572,\n",
       " 573,\n",
       " 574,\n",
       " 575,\n",
       " 577,\n",
       " 578,\n",
       " 579,\n",
       " 581,\n",
       " 582,\n",
       " 583,\n",
       " 584,\n",
       " 585,\n",
       " 586,\n",
       " 587,\n",
       " 588,\n",
       " 589,\n",
       " 592,\n",
       " 593,\n",
       " 594,\n",
       " 595,\n",
       " 597,\n",
       " 598,\n",
       " 599,\n",
       " 600,\n",
       " 601,\n",
       " 602,\n",
       " 603,\n",
       " 604,\n",
       " 605,\n",
       " 606,\n",
       " 607,\n",
       " 608,\n",
       " 609,\n",
       " 610,\n",
       " 611,\n",
       " 612,\n",
       " 613,\n",
       " 614,\n",
       " 615,\n",
       " 616,\n",
       " 617,\n",
       " 618,\n",
       " 619,\n",
       " 620,\n",
       " 621,\n",
       " 622,\n",
       " 623,\n",
       " 624,\n",
       " 625,\n",
       " 626,\n",
       " 627,\n",
       " 628,\n",
       " 629,\n",
       " 630,\n",
       " 631,\n",
       " 632,\n",
       " 633,\n",
       " 634,\n",
       " 635,\n",
       " 636,\n",
       " 637,\n",
       " 638,\n",
       " 639,\n",
       " 640,\n",
       " 641,\n",
       " 642,\n",
       " 643,\n",
       " 644,\n",
       " 645,\n",
       " 646,\n",
       " 647,\n",
       " 649,\n",
       " 650,\n",
       " 651,\n",
       " 652,\n",
       " 653,\n",
       " 654,\n",
       " 655,\n",
       " 657,\n",
       " 658,\n",
       " 659,\n",
       " 660,\n",
       " 661,\n",
       " 662,\n",
       " 663,\n",
       " 664,\n",
       " 665,\n",
       " 666,\n",
       " 667,\n",
       " 669,\n",
       " 670,\n",
       " 672,\n",
       " 673,\n",
       " 674,\n",
       " 675,\n",
       " 676,\n",
       " 677,\n",
       " 678,\n",
       " 679,\n",
       " 680,\n",
       " 681,\n",
       " 682,\n",
       " 683,\n",
       " 684,\n",
       " 685,\n",
       " 687,\n",
       " 688,\n",
       " 689,\n",
       " 690,\n",
       " 691,\n",
       " 692,\n",
       " 693,\n",
       " 694,\n",
       " 696,\n",
       " 697,\n",
       " 698,\n",
       " 699,\n",
       " 700,\n",
       " 701,\n",
       " 702,\n",
       " 703,\n",
       " 704,\n",
       " 705,\n",
       " 706,\n",
       " 707,\n",
       " 708,\n",
       " 709,\n",
       " 710,\n",
       " 711,\n",
       " 712,\n",
       " 713,\n",
       " 714,\n",
       " 715,\n",
       " 716,\n",
       " 717,\n",
       " 718,\n",
       " 719,\n",
       " 720,\n",
       " 721,\n",
       " 722,\n",
       " 723,\n",
       " 724,\n",
       " 725,\n",
       " 726,\n",
       " 727,\n",
       " 728,\n",
       " 729,\n",
       " 730,\n",
       " 731,\n",
       " 732,\n",
       " 733,\n",
       " 735,\n",
       " 736,\n",
       " 737,\n",
       " 738,\n",
       " 739,\n",
       " 740,\n",
       " 741,\n",
       " 742,\n",
       " 743,\n",
       " 744,\n",
       " 745,\n",
       " 746,\n",
       " 747,\n",
       " 748,\n",
       " 749,\n",
       " 750,\n",
       " 751,\n",
       " 752,\n",
       " 753,\n",
       " 754,\n",
       " 755,\n",
       " 756,\n",
       " 757,\n",
       " 758,\n",
       " 759,\n",
       " 760,\n",
       " 761,\n",
       " 762,\n",
       " 763,\n",
       " 764,\n",
       " 765,\n",
       " 766,\n",
       " 767,\n",
       " 768,\n",
       " 769,\n",
       " 770,\n",
       " 771,\n",
       " 772,\n",
       " 773,\n",
       " 774,\n",
       " 775,\n",
       " 776,\n",
       " 777,\n",
       " 778,\n",
       " 779,\n",
       " 780,\n",
       " 781,\n",
       " 782,\n",
       " 783,\n",
       " 784,\n",
       " 785,\n",
       " 786,\n",
       " 787,\n",
       " 790,\n",
       " 791,\n",
       " 793,\n",
       " 794,\n",
       " 795,\n",
       " 796,\n",
       " 797,\n",
       " 798,\n",
       " 799,\n",
       " 800,\n",
       " 801,\n",
       " 802,\n",
       " 803,\n",
       " 804,\n",
       " 805,\n",
       " 807,\n",
       " 808,\n",
       " 809,\n",
       " 810,\n",
       " 812,\n",
       " 813,\n",
       " 814,\n",
       " 815,\n",
       " 816,\n",
       " 817,\n",
       " 818,\n",
       " 819,\n",
       " 820,\n",
       " 821,\n",
       " 822,\n",
       " 823,\n",
       " 824,\n",
       " 825,\n",
       " 826,\n",
       " 827,\n",
       " 828,\n",
       " 829,\n",
       " 831,\n",
       " 832,\n",
       " 833,\n",
       " 835,\n",
       " 836,\n",
       " 837,\n",
       " 838,\n",
       " 839,\n",
       " 840,\n",
       " 841,\n",
       " 842,\n",
       " 843,\n",
       " 844,\n",
       " 845,\n",
       " 846,\n",
       " 847,\n",
       " 848,\n",
       " 849,\n",
       " 850,\n",
       " 851,\n",
       " 852,\n",
       " 853,\n",
       " 854,\n",
       " 855,\n",
       " 856,\n",
       " 857,\n",
       " 858,\n",
       " 859,\n",
       " 860,\n",
       " 861,\n",
       " 862,\n",
       " 863,\n",
       " 864,\n",
       " 865,\n",
       " 866,\n",
       " 867,\n",
       " 868,\n",
       " 869,\n",
       " 870,\n",
       " 871,\n",
       " 872,\n",
       " 873,\n",
       " 874,\n",
       " 875,\n",
       " 876,\n",
       " 877,\n",
       " 878,\n",
       " 879,\n",
       " 880,\n",
       " 881,\n",
       " 882,\n",
       " 883,\n",
       " 884,\n",
       " 886,\n",
       " 887,\n",
       " 888,\n",
       " 889,\n",
       " 891,\n",
       " 892,\n",
       " 893,\n",
       " 894,\n",
       " 895,\n",
       " 896,\n",
       " 897,\n",
       " 898,\n",
       " 899,\n",
       " 900,\n",
       " 901,\n",
       " 902,\n",
       " 904,\n",
       " 905,\n",
       " 906,\n",
       " 907,\n",
       " 908,\n",
       " 909,\n",
       " 910,\n",
       " 911,\n",
       " 912,\n",
       " 913,\n",
       " 914,\n",
       " 915,\n",
       " 916,\n",
       " 917,\n",
       " 918,\n",
       " 919,\n",
       " 920,\n",
       " 921,\n",
       " 922,\n",
       " 923,\n",
       " 925,\n",
       " 926,\n",
       " 927,\n",
       " 928,\n",
       " 929,\n",
       " 930,\n",
       " 932,\n",
       " 933,\n",
       " 934,\n",
       " 935,\n",
       " 936,\n",
       " 937,\n",
       " 938,\n",
       " 939,\n",
       " 940,\n",
       " 941,\n",
       " 942,\n",
       " 943,\n",
       " 944,\n",
       " 945,\n",
       " 946,\n",
       " 947,\n",
       " 948,\n",
       " 949,\n",
       " 950,\n",
       " 951,\n",
       " 952,\n",
       " 953,\n",
       " 954,\n",
       " 955,\n",
       " 956,\n",
       " 957,\n",
       " 958,\n",
       " 959,\n",
       " 960,\n",
       " 961,\n",
       " 962,\n",
       " 963,\n",
       " 964,\n",
       " 965,\n",
       " 966,\n",
       " 967,\n",
       " 968,\n",
       " 970,\n",
       " 971,\n",
       " 972,\n",
       " 973,\n",
       " 974,\n",
       " 975,\n",
       " 976,\n",
       " 977,\n",
       " 978,\n",
       " 979,\n",
       " 980,\n",
       " 981,\n",
       " 983,\n",
       " 984,\n",
       " 985,\n",
       " 986,\n",
       " 987,\n",
       " 988,\n",
       " 989,\n",
       " 990,\n",
       " 991,\n",
       " 992,\n",
       " 993,\n",
       " 994,\n",
       " 995,\n",
       " 996,\n",
       " 997,\n",
       " 998,\n",
       " 999,\n",
       " 1000,\n",
       " 1001,\n",
       " 1002,\n",
       " 1003,\n",
       " 1004,\n",
       " 1005,\n",
       " 1006,\n",
       " 1007,\n",
       " 1008,\n",
       " 1009,\n",
       " 1010,\n",
       " 1011,\n",
       " 1012,\n",
       " 1013,\n",
       " 1014,\n",
       " 1015,\n",
       " 1016,\n",
       " 1017,\n",
       " 1018,\n",
       " 1020,\n",
       " 1021,\n",
       " 1022,\n",
       " 1023,\n",
       " 1024,\n",
       " 1025,\n",
       " 1026,\n",
       " 1027,\n",
       " 1028,\n",
       " 1029,\n",
       " 1030,\n",
       " 1031,\n",
       " 1032,\n",
       " 1033,\n",
       " 1036,\n",
       " 1037,\n",
       " 1038,\n",
       " 1040,\n",
       " 1041,\n",
       " 1042,\n",
       " 1043,\n",
       " 1044,\n",
       " 1045,\n",
       " 1047,\n",
       " 1048,\n",
       " 1049,\n",
       " 1050,\n",
       " 1051,\n",
       " 1052,\n",
       " 1053,\n",
       " 1054,\n",
       " 1055,\n",
       " 1056,\n",
       " 1057,\n",
       " 1058,\n",
       " 1059,\n",
       " 1060,\n",
       " 1061,\n",
       " 1062,\n",
       " 1063,\n",
       " 1064,\n",
       " 1065,\n",
       " 1066,\n",
       " ...]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(df_1.loc[df.workclass!=' ?','workclass']).index.tolist()\n",
    "\n",
    "# workclass中不等于？的索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94e41bdc-b7ef-4449-a08d-2fbf03dc0c68",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 转化标签\n",
    "#from sklearn.preprocessing import LabelEncoder\n",
    "#df_1.iloc[:,1] = LabelEncoder().fit_transform(df_1.iloc[:,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "98c36560-db9a-41b7-af2f-54ed76401c6f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>2</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11th</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>2</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age workclass   education       marital-status          occupation  \\\n",
       "0  39.0         0   Bachelors        Never-married        Adm-clerical   \n",
       "1  50.0         1   Bachelors   Married-civ-spouse     Exec-managerial   \n",
       "2  38.0         2     HS-grad             Divorced   Handlers-cleaners   \n",
       "3  53.0         2        11th   Married-civ-spouse   Handlers-cleaners   \n",
       "4  28.0         2   Bachelors   Married-civ-spouse      Prof-specialty   \n",
       "\n",
       "     relationship    race      sex  capital-gain  capital-loss  hours-per-week  \n",
       "0   Not-in-family   White     Male        2174.0           0.0            40.0  \n",
       "1         Husband   White     Male           0.0           0.0            13.0  \n",
       "2   Not-in-family   White     Male           0.0           0.0            40.0  \n",
       "3         Husband   Black     Male           0.0           0.0            40.0  \n",
       "4            Wife   Black   Female           0.0           0.0            40.0  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "3ae2c8c4-8537-4359-9497-18da65eae9dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对特征进行转化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "586429af-8a7a-402f-aaa2-2fccea5e45bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import OrdinalEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "128d742a-df54-481c-bf1b-452870a71c31",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1.iloc[:,2:8] = OrdinalEncoder().fit_transform(df_1.iloc[:,2:8])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "72f2790f-3157-4ce1-8eb8-5f32b106dd31",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>2</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32556</th>\n",
       "      <td>27.0</td>\n",
       "      <td>2</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32557</th>\n",
       "      <td>40.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32558</th>\n",
       "      <td>58.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32559</th>\n",
       "      <td>22.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32560</th>\n",
       "      <td>52.0</td>\n",
       "      <td>5</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15024.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>32561 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age workclass  education  marital-status  occupation  relationship  \\\n",
       "0      39.0         0        9.0             4.0         1.0           1.0   \n",
       "1      50.0         1        9.0             2.0         4.0           0.0   \n",
       "2      38.0         2       11.0             0.0         6.0           1.0   \n",
       "3      53.0         2        1.0             2.0         6.0           0.0   \n",
       "4      28.0         2        9.0             2.0        10.0           5.0   \n",
       "...     ...       ...        ...             ...         ...           ...   \n",
       "32556  27.0         2        7.0             2.0        13.0           5.0   \n",
       "32557  40.0         2       11.0             2.0         7.0           0.0   \n",
       "32558  58.0         2       11.0             6.0         1.0           4.0   \n",
       "32559  22.0         2       11.0             4.0         1.0           3.0   \n",
       "32560  52.0         5       11.0             2.0         4.0           5.0   \n",
       "\n",
       "       race  sex  capital-gain  capital-loss  hours-per-week  \n",
       "0       4.0  1.0        2174.0           0.0            40.0  \n",
       "1       4.0  1.0           0.0           0.0            13.0  \n",
       "2       4.0  1.0           0.0           0.0            40.0  \n",
       "3       2.0  1.0           0.0           0.0            40.0  \n",
       "4       2.0  0.0           0.0           0.0            40.0  \n",
       "...     ...  ...           ...           ...             ...  \n",
       "32556   4.0  0.0           0.0           0.0            38.0  \n",
       "32557   4.0  1.0           0.0           0.0            40.0  \n",
       "32558   4.0  0.0           0.0           0.0            40.0  \n",
       "32559   4.0  1.0           0.0           0.0            20.0  \n",
       "32560   4.0  0.0       15024.0           0.0            40.0  \n",
       "\n",
       "[32561 rows x 11 columns]"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1 # 所有分类变量处理完毕"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e9e2fa99-83e0-4cbc-96a3-461b420dcbd6",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>age</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>38.581647</td>\n",
       "      <td>13.640433</td>\n",
       "      <td>17.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>workclass</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>3.868892</td>\n",
       "      <td>1.455960</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>education</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>10.298210</td>\n",
       "      <td>3.870264</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>marital-status</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>2.611836</td>\n",
       "      <td>1.506222</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>occupation</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>6.572740</td>\n",
       "      <td>4.228857</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>relationship</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>1.446362</td>\n",
       "      <td>1.606771</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>race</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>3.665858</td>\n",
       "      <td>0.848806</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>0.669205</td>\n",
       "      <td>0.470506</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>capital-gain</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>1077.648844</td>\n",
       "      <td>7385.292085</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>99999.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>capital-loss</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>87.303830</td>\n",
       "      <td>402.960219</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4356.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>hours-per-week</th>\n",
       "      <td>32561.0</td>\n",
       "      <td>40.437456</td>\n",
       "      <td>12.347429</td>\n",
       "      <td>1.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  count         mean          std   min   25%   50%   75%  \\\n",
       "age             32561.0    38.581647    13.640433  17.0  28.0  37.0  48.0   \n",
       "workclass       32561.0     3.868892     1.455960   0.0   4.0   4.0   4.0   \n",
       "education       32561.0    10.298210     3.870264   0.0   9.0  11.0  12.0   \n",
       "marital-status  32561.0     2.611836     1.506222   0.0   2.0   2.0   4.0   \n",
       "occupation      32561.0     6.572740     4.228857   0.0   3.0   7.0  10.0   \n",
       "relationship    32561.0     1.446362     1.606771   0.0   0.0   1.0   3.0   \n",
       "race            32561.0     3.665858     0.848806   0.0   4.0   4.0   4.0   \n",
       "sex             32561.0     0.669205     0.470506   0.0   0.0   1.0   1.0   \n",
       "capital-gain    32561.0  1077.648844  7385.292085   0.0   0.0   0.0   0.0   \n",
       "capital-loss    32561.0    87.303830   402.960219   0.0   0.0   0.0   0.0   \n",
       "hours-per-week  32561.0    40.437456    12.347429   1.0  40.0  40.0  45.0   \n",
       "\n",
       "                    max  \n",
       "age                90.0  \n",
       "workclass           8.0  \n",
       "education          15.0  \n",
       "marital-status      6.0  \n",
       "occupation         14.0  \n",
       "relationship        5.0  \n",
       "race                4.0  \n",
       "sex                 1.0  \n",
       "capital-gain    99999.0  \n",
       "capital-loss     4356.0  \n",
       "hours-per-week     99.0  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.describe().T\n",
    "\n",
    "# 资本收益和资本损失偏态太严重，舍去"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cbe1b615-9bb7-4143-a6cf-8c57b08bc8e4",
   "metadata": {},
   "source": [
    "## 划分训练集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "c00f1486-2504-41c9-af90-7b11b6031f6b",
   "metadata": {},
   "outputs": [],
   "source": [
    "train = df_1.loc[df.workclass != ' ?',:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "5e5830e5-d5e3-4392-a4e7-61ffb2043992",
   "metadata": {},
   "outputs": [],
   "source": [
    "test = df_1.loc[df.workclass == ' ?',:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "9a3be155-615d-4a0b-9924-f9928c14e122",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 修正索引\n",
    "for i in [train,test]:\n",
    "    i.index = range(i.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "82ffd309-31d8-418a-96b9-877070f4c525",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>2</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30720</th>\n",
       "      <td>27.0</td>\n",
       "      <td>2</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30721</th>\n",
       "      <td>40.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30722</th>\n",
       "      <td>58.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30723</th>\n",
       "      <td>22.0</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30724</th>\n",
       "      <td>52.0</td>\n",
       "      <td>5</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15024.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30725 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age workclass  education  marital-status  occupation  relationship  \\\n",
       "0      39.0         0        9.0             4.0         1.0           1.0   \n",
       "1      50.0         1        9.0             2.0         4.0           0.0   \n",
       "2      38.0         2       11.0             0.0         6.0           1.0   \n",
       "3      53.0         2        1.0             2.0         6.0           0.0   \n",
       "4      28.0         2        9.0             2.0        10.0           5.0   \n",
       "...     ...       ...        ...             ...         ...           ...   \n",
       "30720  27.0         2        7.0             2.0        13.0           5.0   \n",
       "30721  40.0         2       11.0             2.0         7.0           0.0   \n",
       "30722  58.0         2       11.0             6.0         1.0           4.0   \n",
       "30723  22.0         2       11.0             4.0         1.0           3.0   \n",
       "30724  52.0         5       11.0             2.0         4.0           5.0   \n",
       "\n",
       "       race  sex  capital-gain  capital-loss  hours-per-week  \n",
       "0       4.0  1.0        2174.0           0.0            40.0  \n",
       "1       4.0  1.0           0.0           0.0            13.0  \n",
       "2       4.0  1.0           0.0           0.0            40.0  \n",
       "3       2.0  1.0           0.0           0.0            40.0  \n",
       "4       2.0  0.0           0.0           0.0            40.0  \n",
       "...     ...  ...           ...           ...             ...  \n",
       "30720   4.0  0.0           0.0           0.0            38.0  \n",
       "30721   4.0  1.0           0.0           0.0            40.0  \n",
       "30722   4.0  0.0           0.0           0.0            40.0  \n",
       "30723   4.0  1.0           0.0           0.0            20.0  \n",
       "30724   4.0  0.0       15024.0           0.0            40.0  \n",
       "\n",
       "[30725 rows x 11 columns]"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "1f9911f1-1db1-4db9-a334-a917cf1dfca4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30725, 11)"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "63cc010e-bd6b-441b-9ebb-89058091f822",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>54.0</td>\n",
       "      <td>?</td>\n",
       "      <td>15.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>32.0</td>\n",
       "      <td>?</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>25.0</td>\n",
       "      <td>?</td>\n",
       "      <td>15.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>67.0</td>\n",
       "      <td>?</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>17.0</td>\n",
       "      <td>?</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>34095.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1831</th>\n",
       "      <td>35.0</td>\n",
       "      <td>?</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>55.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1832</th>\n",
       "      <td>30.0</td>\n",
       "      <td>?</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1833</th>\n",
       "      <td>71.0</td>\n",
       "      <td>?</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1834</th>\n",
       "      <td>41.0</td>\n",
       "      <td>?</td>\n",
       "      <td>11.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1835</th>\n",
       "      <td>72.0</td>\n",
       "      <td>?</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1836 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       age workclass  education  marital-status  occupation  relationship  \\\n",
       "0     54.0         ?       15.0             2.0         0.0           0.0   \n",
       "1     32.0         ?        5.0             3.0         0.0           1.0   \n",
       "2     25.0         ?       15.0             4.0         0.0           3.0   \n",
       "3     67.0         ?        0.0             2.0         0.0           0.0   \n",
       "4     17.0         ?        0.0             4.0         0.0           3.0   \n",
       "...    ...       ...        ...             ...         ...           ...   \n",
       "1831  35.0         ?        9.0             2.0         0.0           5.0   \n",
       "1832  30.0         ?        9.0             4.0         0.0           1.0   \n",
       "1833  71.0         ?       10.0             2.0         0.0           0.0   \n",
       "1834  41.0         ?       11.0             5.0         0.0           1.0   \n",
       "1835  72.0         ?       11.0             2.0         0.0           0.0   \n",
       "\n",
       "      race  sex  capital-gain  capital-loss  hours-per-week  \n",
       "0      1.0  1.0           0.0           0.0            60.0  \n",
       "1      4.0  1.0           0.0           0.0            40.0  \n",
       "2      4.0  1.0           0.0           0.0            40.0  \n",
       "3      4.0  1.0           0.0           0.0             2.0  \n",
       "4      4.0  0.0       34095.0           0.0            32.0  \n",
       "...    ...  ...           ...           ...             ...  \n",
       "1831   4.0  0.0           0.0           0.0            55.0  \n",
       "1832   1.0  0.0           0.0           0.0            99.0  \n",
       "1833   4.0  1.0           0.0           0.0            10.0  \n",
       "1834   2.0  0.0           0.0           0.0            32.0  \n",
       "1835   4.0  1.0           0.0           0.0            25.0  \n",
       "\n",
       "[1836 rows x 11 columns]"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "7723260a-db8a-4917-8903-578ab136a50a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1836, 11)"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2464b1fd-63e3-4593-8939-507f9c27d29c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "d87ce87b-2152-4ac4-8cc0-bc070590c6fa",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[39. 50. 38. 53. 28. 37. 49. 52. 31. 42. 30. 23. 32. 40. 34. 25. 43. 54.\n",
      " 35. 59. 56. 19. 20. 45. 22. 48. 21. 24. 57. 44. 41. 29. 18. 47. 46. 36.\n",
      " 79. 27. 67. 33. 76. 17. 55. 61. 70. 64. 71. 68. 66. 51. 58. 26. 60. 90.\n",
      " 75. 65. 77. 62. 63. 80. 72. 74. 69. 73. 81. 78. 88. 82. 83. 84. 85. 86.\n",
      " 87.]\n",
      "[0 1 2 3 4 ' ?' 5 6 7]\n",
      "[ 9. 11.  1. 12.  6. 15.  7.  8.  5. 10. 14.  4.  0.  3. 13.  2.]\n",
      "[4. 2. 0. 3. 5. 1. 6.]\n",
      "[ 1.  4.  6. 10.  8. 12.  3. 14.  5.  7. 13.  0. 11.  2.  9.]\n",
      "[1. 0. 5. 3. 4. 2.]\n",
      "[4. 2. 1. 0. 3.]\n",
      "[1. 0.]\n",
      "[ 2174.     0. 14084.  5178.  5013.  2407. 14344. 15024.  7688. 34095.\n",
      "  4064.  4386.  7298.  1409.  3674.  1055.  3464.  2050.  2176.   594.\n",
      " 20051.  6849.  4101.  1111.  8614.  3411.  2597. 25236.  4650.  9386.\n",
      "  2463.  3103. 10605.  2964.  3325.  2580.  3471.  4865. 99999.  6514.\n",
      "  1471.  2329.  2105.  2885. 25124. 10520.  2202.  2961. 27828.  6767.\n",
      "  2228.  1506. 13550.  2635.  5556.  4787.  3781.  3137.  3818.  3942.\n",
      "   914.   401.  2829.  2977.  4934.  2062.  2354.  5455. 15020.  1424.\n",
      "  3273. 22040.  4416.  3908. 10566.   991.  4931.  1086.  7430.  6497.\n",
      "   114.  7896.  2346.  3418.  3432.  2907.  1151.  2414.  2290. 15831.\n",
      " 41310.  4508.  2538.  3456.  6418.  1848.  3887.  5721.  9562.  1455.\n",
      "  2036.  1831. 11678.  2936.  2993.  7443.  6360.  1797.  1173.  4687.\n",
      "  6723.  2009.  6097.  2653.  1639. 18481.  7978.  2387.  5060.]\n",
      "[   0. 2042. 1408. 1902. 1573. 1887. 1719. 1762. 1564. 2179. 1816. 1980.\n",
      " 1977. 1876. 1340. 2206. 1741. 1485. 2339. 2415. 1380. 1721. 2051. 2377.\n",
      " 1669. 2352. 1672.  653. 2392. 1504. 2001. 1590. 1651. 1628. 1848. 1740.\n",
      " 2002. 1579. 2258. 1602.  419. 2547. 2174. 2205. 1726. 2444. 1138. 2238.\n",
      "  625.  213. 1539.  880. 1668. 1092. 1594. 3004. 2231. 1844.  810. 2824.\n",
      " 2559. 2057. 1974.  974. 2149. 1825. 1735. 1258. 2129. 2603. 2282.  323.\n",
      " 4356. 2246. 1617. 1648. 2489. 3770. 1755. 3683. 2267. 2080. 2457.  155.\n",
      " 3900. 2201. 1944. 2467. 2163. 2754. 2472. 1411.]\n",
      "[40. 13. 16. 45. 50. 80. 30. 35. 60. 20. 52. 44. 15. 25. 38. 43. 55. 48.\n",
      " 58. 32. 70.  2. 22. 56. 41. 28. 36. 24. 46. 42. 12. 65.  1. 10. 34. 75.\n",
      " 98. 33. 54.  8.  6. 64. 19. 18. 72.  5.  9. 47. 37. 21. 26. 14.  4. 59.\n",
      "  7. 99. 53. 39. 62. 57. 78. 90. 66. 11. 49. 84.  3. 17. 68. 27. 85. 31.\n",
      " 51. 77. 63. 23. 87. 88. 73. 89. 97. 94. 29. 96. 67. 82. 86. 91. 81. 76.\n",
      " 92. 61. 74. 95.]\n"
     ]
    }
   ],
   "source": [
    "for i in df_1.columns.tolist():\n",
    "    print(df_1.loc[:,i].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a3c7b9c-4c78-4f91-a683-f911daa18ace",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "a952319b-0c9a-4a1c-92c6-0fddb5b27fbb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30725, 11)"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "0c35716d-131a-4a79-9cdc-ed5644f3f618",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30725, 10)"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain = train.loc[:,train.columns != 'workclass']\n",
    "Xtrain.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "4c4de4d4-3e34-443a-94cd-1d95a1a15d32",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3, 4, 5, 6, 7], dtype=object)"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest = train.iloc[:,1]\n",
    "Xtest.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e79a41f-1287-4f9d-9af8-ef669cddd6ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 测试集是worklcass=？但是其他列无缺失值的行\n",
    "#condition_4 = df.workclass == ' ?'\n",
    "#train_1 = df_1.loc[condition_4 & condition_2 & condition_3]\n",
    "#train_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "567d613c-844d-47a5-b8aa-b249b7d09aab",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#df_1.loc[condition_4 & condition_2]\n",
    "# workcalss 缺失了，occupation也就缺失了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "99113f31-8af9-4c5f-865b-d45d450ae42c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 把occupation删掉\n",
    "df_2 = df_1.drop('occupation',1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "a29787cc-756f-43f5-865e-68e9e58acc8e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>7</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>6</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>4</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>4</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  workclass  education  marital-status  relationship  race  sex  \\\n",
       "0  39.0          7        9.0             4.0           1.0   4.0  1.0   \n",
       "1  50.0          6        9.0             2.0           0.0   4.0  1.0   \n",
       "2  38.0          4       11.0             0.0           1.0   4.0  1.0   \n",
       "3  53.0          4        1.0             2.0           0.0   2.0  1.0   \n",
       "4  28.0          4        9.0             2.0           5.0   2.0  0.0   \n",
       "\n",
       "   capital-gain  capital-loss  hours-per-week  \n",
       "0        2174.0           0.0            40.0  \n",
       "1           0.0           0.0            13.0  \n",
       "2           0.0           0.0            40.0  \n",
       "3           0.0           0.0            40.0  \n",
       "4           0.0           0.0            40.0  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#df_2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "c4ad2d57-2714-4fd2-9443-467dff3fc55c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>7</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>6</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>4</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>4</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  workclass  education  marital-status  relationship  race  sex  \\\n",
       "0  39.0          7        9.0             4.0           1.0   4.0  1.0   \n",
       "1  50.0          6        9.0             2.0           0.0   4.0  1.0   \n",
       "2  38.0          4       11.0             0.0           1.0   4.0  1.0   \n",
       "3  53.0          4        1.0             2.0           0.0   2.0  1.0   \n",
       "4  28.0          4        9.0             2.0           5.0   2.0  0.0   \n",
       "\n",
       "   capital-gain  capital-loss  hours-per-week  \n",
       "0        2174.0           0.0            40.0  \n",
       "1           0.0           0.0            13.0  \n",
       "2           0.0           0.0            40.0  \n",
       "3           0.0           0.0            40.0  \n",
       "4           0.0           0.0            40.0  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 训练集：workclass，occupation，native-country不包含？的行\n",
    "#condition_1 = df.workclass != ' ?'\n",
    "#condition_2 = df.occupation != ' ?'\n",
    "#condition_3 = df['native-country'] != ' ?'\n",
    "#train = df_2.loc[condition_1 & condition_2 & condition_3]  # 全部行不包含？\n",
    "#train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33658116-b25b-4fdc-a96c-34777942004e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "156790e3-028c-44c9-8da5-111680e49cb4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30725, 11)"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "a4da7370-8e53-4ae1-8c6a-262bd4ed910a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30720</th>\n",
       "      <td>27.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30721</th>\n",
       "      <td>40.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30722</th>\n",
       "      <td>58.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30723</th>\n",
       "      <td>22.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30724</th>\n",
       "      <td>52.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15024.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30725 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age  education  marital-status  occupation  relationship  race  sex  \\\n",
       "0      39.0        9.0             4.0         1.0           1.0   4.0  1.0   \n",
       "1      50.0        9.0             2.0         4.0           0.0   4.0  1.0   \n",
       "2      38.0       11.0             0.0         6.0           1.0   4.0  1.0   \n",
       "3      53.0        1.0             2.0         6.0           0.0   2.0  1.0   \n",
       "4      28.0        9.0             2.0        10.0           5.0   2.0  0.0   \n",
       "...     ...        ...             ...         ...           ...   ...  ...   \n",
       "30720  27.0        7.0             2.0        13.0           5.0   4.0  0.0   \n",
       "30721  40.0       11.0             2.0         7.0           0.0   4.0  1.0   \n",
       "30722  58.0       11.0             6.0         1.0           4.0   4.0  0.0   \n",
       "30723  22.0       11.0             4.0         1.0           3.0   4.0  1.0   \n",
       "30724  52.0       11.0             2.0         4.0           5.0   4.0  0.0   \n",
       "\n",
       "       capital-gain  capital-loss  hours-per-week  \n",
       "0            2174.0           0.0            40.0  \n",
       "1               0.0           0.0            13.0  \n",
       "2               0.0           0.0            40.0  \n",
       "3               0.0           0.0            40.0  \n",
       "4               0.0           0.0            40.0  \n",
       "...             ...           ...             ...  \n",
       "30720           0.0           0.0            38.0  \n",
       "30721           0.0           0.0            40.0  \n",
       "30722           0.0           0.0            40.0  \n",
       "30723           0.0           0.0            20.0  \n",
       "30724       15024.0           0.0            40.0  \n",
       "\n",
       "[30725 rows x 10 columns]"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain = train.loc[:,train.columns != 'workclass']\n",
    "Xtrain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "b3a79128-cc2a-458f-a5b6-231da123a7bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "Ytrain = train.iloc[:,1]\n",
    "Ytrain=Ytrain.astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "e367f6a7-f3d5-4ca5-98b6-71841b641762",
   "metadata": {},
   "outputs": [],
   "source": [
    "#train_2 = df_2.loc[condition_4 & condition_3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "2d7cd7d7-87a8-4986-be4b-9d264d51e883",
   "metadata": {},
   "outputs": [],
   "source": [
    "#train_2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "acbdc45c-2f99-4969-b630-14106a74bbb5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        ?\n",
       "1        ?\n",
       "2        ?\n",
       "3        ?\n",
       "4        ?\n",
       "        ..\n",
       "1831     ?\n",
       "1832     ?\n",
       "1833     ?\n",
       "1834     ?\n",
       "1835     ?\n",
       "Name: workclass, Length: 1836, dtype: object"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytest = test.iloc[:,1]\n",
    "Ytest "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "c36d1562-bcfa-4638-a209-1b26b8fc87c9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>54.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>32.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>25.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>67.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>34095.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  education  marital-status  occupation  relationship  race  sex  \\\n",
       "0  54.0       15.0             2.0         0.0           0.0   1.0  1.0   \n",
       "1  32.0        5.0             3.0         0.0           1.0   4.0  1.0   \n",
       "2  25.0       15.0             4.0         0.0           3.0   4.0  1.0   \n",
       "3  67.0        0.0             2.0         0.0           0.0   4.0  1.0   \n",
       "4  17.0        0.0             4.0         0.0           3.0   4.0  0.0   \n",
       "\n",
       "   capital-gain  capital-loss  hours-per-week  \n",
       "0           0.0           0.0            60.0  \n",
       "1           0.0           0.0            40.0  \n",
       "2           0.0           0.0            40.0  \n",
       "3           0.0           0.0             2.0  \n",
       "4       34095.0           0.0            32.0  "
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest = test.loc[:,test.columns != 'workclass']\n",
    "Xtest.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d35f74f2-ba03-41ed-8b75-982901a82471",
   "metadata": {},
   "source": [
    "## 训练集测试分好了，导入决策树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "e9adcf6e-d8eb-47af-8a89-be21adab8226",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "c69b216f-ff5a-48a6-a7a9-75c38db08cad",
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = tree.DecisionTreeClassifier()# 实例化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "8940d5b1-cf82-4f4e-a3d4-c90e4d988a95",
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = clf.fit(Xtrain,Ytrain) # 用训练集数据训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "f49753af-1e5a-4d9c-9959-8d378bb2c6b7",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 4,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 0,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 6,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 5,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 1,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 0,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 2,\n",
       " 1,\n",
       " 0,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 3,\n",
       " 4,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 3,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 3,\n",
       " 0,\n",
       " 4,\n",
       " 2,\n",
       " 0,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 1,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 4,\n",
       " 4,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 0,\n",
       " 0,\n",
       " 4,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 1,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 4,\n",
       " 5,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 7,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 4,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " 1,\n",
       " 0,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 1,\n",
       " 1,\n",
       " 3,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 1,\n",
       " 2,\n",
       " 7,\n",
       " 3,\n",
       " 0,\n",
       " 6,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 0,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 4,\n",
       " 0,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 3,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 7,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 7,\n",
       " ...]"
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "li_1 = clf.predict(Xtest).tolist() # 决策树预测的值\n",
    "li_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "id": "fa2cd274-3199-41c3-9b9d-f705c496050e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[' State-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Without-pay',\n",
       " ' Never-worked']"
      ]
     },
     "execution_count": 174,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "bc04a019-f4f7-4b93-8420-1736669c1f59",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Without-pay',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' State-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' State-gov',\n",
       " ' Without-pay',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ...]"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "li_2 = []\n",
    "for i in li_1:\n",
    "    li_2.append(label_1[i])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "98fe009d-c1bb-4bc0-b34b-6e15051c6165",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Without-pay',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' State-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Self-emp-not-inc',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' State-gov',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-not-inc',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' State-gov',\n",
       " ' Without-pay',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Local-gov',\n",
       " ' State-gov',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Self-emp-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Self-emp-inc',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Local-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Self-emp-not-inc',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' State-gov',\n",
       " ' Private',\n",
       " ' Private',\n",
       " ' Federal-gov',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Never-worked',\n",
       " ' Private',\n",
       " ' Self-emp-not-inc',\n",
       " ' Never-worked',\n",
       " ...]"
      ]
     },
     "execution_count": 177,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "li_2 # 将数值型转化为原来的变量了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "id": "21aab819-60f4-4b33-9e6a-e9f681e735b1",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[27,\n",
       " 61,\n",
       " 69,\n",
       " 77,\n",
       " 106,\n",
       " 128,\n",
       " 149,\n",
       " 154,\n",
       " 160,\n",
       " 187,\n",
       " 201,\n",
       " 221,\n",
       " 226,\n",
       " 243,\n",
       " 266,\n",
       " 297,\n",
       " 312,\n",
       " 326,\n",
       " 346,\n",
       " 347,\n",
       " 354,\n",
       " 397,\n",
       " 408,\n",
       " 430,\n",
       " 431,\n",
       " 449,\n",
       " 459,\n",
       " 471,\n",
       " 484,\n",
       " 486,\n",
       " 499,\n",
       " 511,\n",
       " 515,\n",
       " 517,\n",
       " 518,\n",
       " 539,\n",
       " 576,\n",
       " 580,\n",
       " 590,\n",
       " 591,\n",
       " 596,\n",
       " 648,\n",
       " 656,\n",
       " 668,\n",
       " 671,\n",
       " 686,\n",
       " 695,\n",
       " 734,\n",
       " 788,\n",
       " 789,\n",
       " 792,\n",
       " 806,\n",
       " 811,\n",
       " 830,\n",
       " 834,\n",
       " 885,\n",
       " 890,\n",
       " 903,\n",
       " 924,\n",
       " 931,\n",
       " 969,\n",
       " 982,\n",
       " 1019,\n",
       " 1034,\n",
       " 1035,\n",
       " 1039,\n",
       " 1046,\n",
       " 1097,\n",
       " 1100,\n",
       " 1131,\n",
       " 1134,\n",
       " 1152,\n",
       " 1167,\n",
       " 1175,\n",
       " 1180,\n",
       " 1185,\n",
       " 1208,\n",
       " 1215,\n",
       " 1217,\n",
       " 1262,\n",
       " 1282,\n",
       " 1290,\n",
       " 1312,\n",
       " 1317,\n",
       " 1325,\n",
       " 1347,\n",
       " 1349,\n",
       " 1371,\n",
       " 1388,\n",
       " 1404,\n",
       " 1420,\n",
       " 1433,\n",
       " 1441,\n",
       " 1452,\n",
       " 1458,\n",
       " 1504,\n",
       " 1544,\n",
       " 1562,\n",
       " 1569,\n",
       " 1570,\n",
       " 1574,\n",
       " 1579,\n",
       " 1592,\n",
       " 1606,\n",
       " 1629,\n",
       " 1656,\n",
       " 1665,\n",
       " 1676,\n",
       " 1704,\n",
       " 1707,\n",
       " 1758,\n",
       " 1761,\n",
       " 1773,\n",
       " 1778,\n",
       " 1823,\n",
       " 1846,\n",
       " 1851,\n",
       " 1865,\n",
       " 1878,\n",
       " 1923,\n",
       " 1931,\n",
       " 1971,\n",
       " 1987,\n",
       " 2025,\n",
       " 2036,\n",
       " 2046,\n",
       " 2061,\n",
       " 2072,\n",
       " 2084,\n",
       " 2091,\n",
       " 2094,\n",
       " 2105,\n",
       " 2118,\n",
       " 2126,\n",
       " 2152,\n",
       " 2155,\n",
       " 2163,\n",
       " 2164,\n",
       " 2210,\n",
       " 2213,\n",
       " 2222,\n",
       " 2281,\n",
       " 2292,\n",
       " 2323,\n",
       " 2327,\n",
       " 2340,\n",
       " 2354,\n",
       " 2356,\n",
       " 2358,\n",
       " 2359,\n",
       " 2372,\n",
       " 2380,\n",
       " 2382,\n",
       " 2397,\n",
       " 2420,\n",
       " 2427,\n",
       " 2464,\n",
       " 2476,\n",
       " 2486,\n",
       " 2491,\n",
       " 2496,\n",
       " 2506,\n",
       " 2513,\n",
       " 2521,\n",
       " 2544,\n",
       " 2566,\n",
       " 2570,\n",
       " 2571,\n",
       " 2578,\n",
       " 2586,\n",
       " 2594,\n",
       " 2606,\n",
       " 2632,\n",
       " 2634,\n",
       " 2638,\n",
       " 2673,\n",
       " 2676,\n",
       " 2689,\n",
       " 2751,\n",
       " 2760,\n",
       " 2761,\n",
       " 2847,\n",
       " 2856,\n",
       " 2857,\n",
       " 2858,\n",
       " 2885,\n",
       " 2930,\n",
       " 2932,\n",
       " 2947,\n",
       " 2952,\n",
       " 2961,\n",
       " 2999,\n",
       " 3005,\n",
       " 3033,\n",
       " 3042,\n",
       " 3065,\n",
       " 3072,\n",
       " 3088,\n",
       " 3096,\n",
       " 3119,\n",
       " 3131,\n",
       " 3146,\n",
       " 3207,\n",
       " 3211,\n",
       " 3228,\n",
       " 3231,\n",
       " 3239,\n",
       " 3255,\n",
       " 3269,\n",
       " 3291,\n",
       " 3297,\n",
       " 3330,\n",
       " 3338,\n",
       " 3351,\n",
       " 3371,\n",
       " 3387,\n",
       " 3402,\n",
       " 3439,\n",
       " 3453,\n",
       " 3456,\n",
       " 3459,\n",
       " 3486,\n",
       " 3516,\n",
       " 3531,\n",
       " 3555,\n",
       " 3572,\n",
       " 3573,\n",
       " 3579,\n",
       " 3588,\n",
       " 3592,\n",
       " 3594,\n",
       " 3597,\n",
       " 3631,\n",
       " 3670,\n",
       " 3703,\n",
       " 3725,\n",
       " 3736,\n",
       " 3744,\n",
       " 3747,\n",
       " 3759,\n",
       " 3773,\n",
       " 3775,\n",
       " 3805,\n",
       " 3822,\n",
       " 3834,\n",
       " 3843,\n",
       " 3851,\n",
       " 3863,\n",
       " 3887,\n",
       " 3895,\n",
       " 3897,\n",
       " 3901,\n",
       " 3902,\n",
       " 3916,\n",
       " 3942,\n",
       " 3947,\n",
       " 3949,\n",
       " 3950,\n",
       " 3963,\n",
       " 3969,\n",
       " 3980,\n",
       " 3990,\n",
       " 4003,\n",
       " 4017,\n",
       " 4018,\n",
       " 4021,\n",
       " 4072,\n",
       " 4080,\n",
       " 4086,\n",
       " 4090,\n",
       " 4109,\n",
       " 4147,\n",
       " 4152,\n",
       " 4155,\n",
       " 4169,\n",
       " 4174,\n",
       " 4200,\n",
       " 4212,\n",
       " 4215,\n",
       " 4217,\n",
       " 4240,\n",
       " 4272,\n",
       " 4288,\n",
       " 4299,\n",
       " 4309,\n",
       " 4315,\n",
       " 4337,\n",
       " 4369,\n",
       " 4393,\n",
       " 4409,\n",
       " 4413,\n",
       " 4422,\n",
       " 4435,\n",
       " 4437,\n",
       " 4460,\n",
       " 4464,\n",
       " 4499,\n",
       " 4500,\n",
       " 4519,\n",
       " 4531,\n",
       " 4548,\n",
       " 4553,\n",
       " 4592,\n",
       " 4606,\n",
       " 4607,\n",
       " 4613,\n",
       " 4621,\n",
       " 4627,\n",
       " 4655,\n",
       " 4685,\n",
       " 4688,\n",
       " 4721,\n",
       " 4729,\n",
       " 4745,\n",
       " 4752,\n",
       " 4755,\n",
       " 4766,\n",
       " 4778,\n",
       " 4782,\n",
       " 4801,\n",
       " 4812,\n",
       " 4818,\n",
       " 4835,\n",
       " 4838,\n",
       " 4884,\n",
       " 4925,\n",
       " 4942,\n",
       " 4958,\n",
       " 4970,\n",
       " 4980,\n",
       " 4982,\n",
       " 5020,\n",
       " 5064,\n",
       " 5147,\n",
       " 5156,\n",
       " 5172,\n",
       " 5193,\n",
       " 5198,\n",
       " 5207,\n",
       " 5209,\n",
       " 5214,\n",
       " 5228,\n",
       " 5255,\n",
       " 5294,\n",
       " 5296,\n",
       " 5302,\n",
       " 5307,\n",
       " 5320,\n",
       " 5321,\n",
       " 5340,\n",
       " 5344,\n",
       " 5346,\n",
       " 5383,\n",
       " 5384,\n",
       " 5440,\n",
       " 5446,\n",
       " 5471,\n",
       " 5492,\n",
       " 5526,\n",
       " 5529,\n",
       " 5548,\n",
       " 5561,\n",
       " 5565,\n",
       " 5590,\n",
       " 5623,\n",
       " 5632,\n",
       " 5652,\n",
       " 5679,\n",
       " 5687,\n",
       " 5720,\n",
       " 5753,\n",
       " 5766,\n",
       " 5788,\n",
       " 5803,\n",
       " 5808,\n",
       " 5832,\n",
       " 5853,\n",
       " 5916,\n",
       " 5921,\n",
       " 5928,\n",
       " 5977,\n",
       " 5983,\n",
       " 6015,\n",
       " 6039,\n",
       " 6051,\n",
       " 6059,\n",
       " 6131,\n",
       " 6178,\n",
       " 6231,\n",
       " 6284,\n",
       " 6285,\n",
       " 6314,\n",
       " 6342,\n",
       " 6351,\n",
       " 6407,\n",
       " 6432,\n",
       " 6448,\n",
       " 6510,\n",
       " 6521,\n",
       " 6536,\n",
       " 6542,\n",
       " 6549,\n",
       " 6558,\n",
       " 6564,\n",
       " 6591,\n",
       " 6640,\n",
       " 6646,\n",
       " 6663,\n",
       " 6679,\n",
       " 6733,\n",
       " 6734,\n",
       " 6753,\n",
       " 6766,\n",
       " 6798,\n",
       " 6834,\n",
       " 6860,\n",
       " 6862,\n",
       " 6877,\n",
       " 6896,\n",
       " 6914,\n",
       " 6935,\n",
       " 6948,\n",
       " 6993,\n",
       " 6995,\n",
       " 7011,\n",
       " 7027,\n",
       " 7049,\n",
       " 7075,\n",
       " 7100,\n",
       " 7102,\n",
       " 7106,\n",
       " 7136,\n",
       " 7149,\n",
       " 7164,\n",
       " 7167,\n",
       " 7173,\n",
       " 7193,\n",
       " 7291,\n",
       " 7301,\n",
       " 7322,\n",
       " 7340,\n",
       " 7352,\n",
       " 7437,\n",
       " 7457,\n",
       " 7463,\n",
       " 7472,\n",
       " 7510,\n",
       " 7554,\n",
       " 7559,\n",
       " 7560,\n",
       " 7576,\n",
       " 7579,\n",
       " 7584,\n",
       " 7611,\n",
       " 7663,\n",
       " 7683,\n",
       " 7724,\n",
       " 7740,\n",
       " 7746,\n",
       " 7750,\n",
       " 7763,\n",
       " 7773,\n",
       " 7784,\n",
       " 7787,\n",
       " 7815,\n",
       " 7826,\n",
       " 7839,\n",
       " 7862,\n",
       " 7872,\n",
       " 7876,\n",
       " 7900,\n",
       " 7905,\n",
       " 7942,\n",
       " 7963,\n",
       " 7971,\n",
       " 7977,\n",
       " 7999,\n",
       " 8002,\n",
       " 8007,\n",
       " 8022,\n",
       " 8042,\n",
       " 8053,\n",
       " 8057,\n",
       " 8069,\n",
       " 8085,\n",
       " 8088,\n",
       " 8096,\n",
       " 8098,\n",
       " 8100,\n",
       " 8134,\n",
       " 8147,\n",
       " 8168,\n",
       " 8189,\n",
       " 8193,\n",
       " 8222,\n",
       " 8241,\n",
       " 8297,\n",
       " 8322,\n",
       " 8364,\n",
       " 8387,\n",
       " 8429,\n",
       " 8446,\n",
       " 8447,\n",
       " 8472,\n",
       " 8499,\n",
       " 8532,\n",
       " 8543,\n",
       " 8565,\n",
       " 8607,\n",
       " 8636,\n",
       " 8643,\n",
       " 8673,\n",
       " 8692,\n",
       " 8694,\n",
       " 8749,\n",
       " 8757,\n",
       " 8764,\n",
       " 8769,\n",
       " 8782,\n",
       " 8788,\n",
       " 8795,\n",
       " 8805,\n",
       " 8822,\n",
       " 8847,\n",
       " 8853,\n",
       " 8908,\n",
       " 8920,\n",
       " 8940,\n",
       " 8949,\n",
       " 8954,\n",
       " 8963,\n",
       " 8985,\n",
       " 8991,\n",
       " 8996,\n",
       " 9028,\n",
       " 9030,\n",
       " 9106,\n",
       " 9138,\n",
       " 9140,\n",
       " 9141,\n",
       " 9147,\n",
       " 9148,\n",
       " 9155,\n",
       " 9170,\n",
       " 9178,\n",
       " 9197,\n",
       " 9211,\n",
       " 9214,\n",
       " 9245,\n",
       " 9293,\n",
       " 9324,\n",
       " 9340,\n",
       " 9342,\n",
       " 9351,\n",
       " 9353,\n",
       " 9358,\n",
       " 9367,\n",
       " 9409,\n",
       " 9452,\n",
       " 9477,\n",
       " 9484,\n",
       " 9490,\n",
       " 9500,\n",
       " 9531,\n",
       " 9536,\n",
       " 9549,\n",
       " 9557,\n",
       " 9577,\n",
       " 9582,\n",
       " 9616,\n",
       " 9626,\n",
       " 9650,\n",
       " 9703,\n",
       " 9708,\n",
       " 9712,\n",
       " 9778,\n",
       " 9787,\n",
       " 9860,\n",
       " 9872,\n",
       " 9885,\n",
       " 9907,\n",
       " 9926,\n",
       " 9927,\n",
       " 9938,\n",
       " 9987,\n",
       " 10013,\n",
       " 10015,\n",
       " 10016,\n",
       " 10035,\n",
       " 10056,\n",
       " 10064,\n",
       " 10094,\n",
       " 10098,\n",
       " 10102,\n",
       " 10110,\n",
       " 10117,\n",
       " 10126,\n",
       " 10139,\n",
       " 10143,\n",
       " 10161,\n",
       " 10222,\n",
       " 10232,\n",
       " 10253,\n",
       " 10328,\n",
       " 10342,\n",
       " 10361,\n",
       " 10411,\n",
       " 10425,\n",
       " 10437,\n",
       " 10440,\n",
       " 10460,\n",
       " 10475,\n",
       " 10485,\n",
       " 10486,\n",
       " 10539,\n",
       " 10546,\n",
       " 10570,\n",
       " 10581,\n",
       " 10673,\n",
       " 10679,\n",
       " 10681,\n",
       " 10683,\n",
       " 10684,\n",
       " 10700,\n",
       " 10704,\n",
       " 10709,\n",
       " 10715,\n",
       " 10718,\n",
       " 10745,\n",
       " 10746,\n",
       " 10784,\n",
       " 10805,\n",
       " 10820,\n",
       " 10828,\n",
       " 10837,\n",
       " 10856,\n",
       " 10881,\n",
       " 10931,\n",
       " 10932,\n",
       " 10955,\n",
       " 10959,\n",
       " 10995,\n",
       " 11001,\n",
       " 11027,\n",
       " 11039,\n",
       " 11046,\n",
       " 11056,\n",
       " 11059,\n",
       " 11085,\n",
       " 11087,\n",
       " 11099,\n",
       " 11158,\n",
       " 11159,\n",
       " 11164,\n",
       " 11192,\n",
       " 11198,\n",
       " 11217,\n",
       " 11228,\n",
       " 11234,\n",
       " 11261,\n",
       " 11286,\n",
       " 11294,\n",
       " 11316,\n",
       " 11328,\n",
       " 11334,\n",
       " 11340,\n",
       " 11346,\n",
       " 11355,\n",
       " 11391,\n",
       " 11413,\n",
       " 11421,\n",
       " 11430,\n",
       " 11460,\n",
       " 11474,\n",
       " 11484,\n",
       " 11500,\n",
       " 11516,\n",
       " 11526,\n",
       " 11532,\n",
       " 11544,\n",
       " 11550,\n",
       " 11573,\n",
       " 11578,\n",
       " 11580,\n",
       " 11591,\n",
       " 11614,\n",
       " 11621,\n",
       " 11658,\n",
       " 11688,\n",
       " 11691,\n",
       " 11713,\n",
       " 11731,\n",
       " 11732,\n",
       " 11734,\n",
       " 11768,\n",
       " 11770,\n",
       " 11773,\n",
       " 11793,\n",
       " 11851,\n",
       " 11864,\n",
       " 11938,\n",
       " 12007,\n",
       " 12008,\n",
       " 12020,\n",
       " 12029,\n",
       " 12068,\n",
       " 12093,\n",
       " 12098,\n",
       " 12130,\n",
       " 12153,\n",
       " 12161,\n",
       " 12175,\n",
       " 12198,\n",
       " 12213,\n",
       " 12214,\n",
       " 12218,\n",
       " 12253,\n",
       " 12299,\n",
       " 12326,\n",
       " 12334,\n",
       " 12351,\n",
       " 12373,\n",
       " 12377,\n",
       " 12405,\n",
       " 12411,\n",
       " 12428,\n",
       " 12438,\n",
       " 12451,\n",
       " 12491,\n",
       " 12492,\n",
       " 12543,\n",
       " 12553,\n",
       " 12589,\n",
       " 12609,\n",
       " 12627,\n",
       " 12651,\n",
       " 12667,\n",
       " 12784,\n",
       " 12794,\n",
       " 12849,\n",
       " 12853,\n",
       " 12858,\n",
       " 12908,\n",
       " 12918,\n",
       " 12919,\n",
       " 12936,\n",
       " 12981,\n",
       " 12991,\n",
       " 12996,\n",
       " 13025,\n",
       " 13026,\n",
       " 13042,\n",
       " 13043,\n",
       " 13065,\n",
       " 13069,\n",
       " 13074,\n",
       " 13095,\n",
       " 13110,\n",
       " 13153,\n",
       " 13161,\n",
       " 13177,\n",
       " 13181,\n",
       " 13182,\n",
       " 13203,\n",
       " 13234,\n",
       " 13290,\n",
       " 13302,\n",
       " 13321,\n",
       " 13347,\n",
       " 13365,\n",
       " 13370,\n",
       " 13384,\n",
       " 13447,\n",
       " 13493,\n",
       " 13498,\n",
       " 13504,\n",
       " 13515,\n",
       " 13525,\n",
       " 13534,\n",
       " 13552,\n",
       " 13556,\n",
       " 13569,\n",
       " 13586,\n",
       " 13608,\n",
       " 13626,\n",
       " 13646,\n",
       " 13711,\n",
       " 13744,\n",
       " 13784,\n",
       " 13815,\n",
       " 13843,\n",
       " 13845,\n",
       " 13884,\n",
       " 13933,\n",
       " 13949,\n",
       " 14005,\n",
       " 14053,\n",
       " 14067,\n",
       " 14108,\n",
       " 14118,\n",
       " 14123,\n",
       " 14151,\n",
       " 14178,\n",
       " 14203,\n",
       " 14213,\n",
       " 14217,\n",
       " 14254,\n",
       " 14255,\n",
       " 14280,\n",
       " 14330,\n",
       " 14344,\n",
       " 14348,\n",
       " 14360,\n",
       " 14363,\n",
       " 14370,\n",
       " 14398,\n",
       " 14418,\n",
       " 14430,\n",
       " 14441,\n",
       " 14499,\n",
       " 14534,\n",
       " 14535,\n",
       " 14536,\n",
       " 14541,\n",
       " 14548,\n",
       " 14571,\n",
       " 14574,\n",
       " 14578,\n",
       " 14602,\n",
       " 14618,\n",
       " 14646,\n",
       " 14672,\n",
       " 14688,\n",
       " 14692,\n",
       " 14717,\n",
       " 14718,\n",
       " 14725,\n",
       " 14742,\n",
       " 14746,\n",
       " 14859,\n",
       " 14860,\n",
       " 14870,\n",
       " 14887,\n",
       " 14911,\n",
       " 14939,\n",
       " 14945,\n",
       " 14980,\n",
       " 14982,\n",
       " 15012,\n",
       " 15022,\n",
       " 15033,\n",
       " 15064,\n",
       " 15069,\n",
       " 15130,\n",
       " 15176,\n",
       " 15192,\n",
       " 15220,\n",
       " 15238,\n",
       " 15256,\n",
       " 15266,\n",
       " 15286,\n",
       " 15292,\n",
       " 15309,\n",
       " 15310,\n",
       " 15350,\n",
       " 15414,\n",
       " 15424,\n",
       " 15426,\n",
       " 15464,\n",
       " 15471,\n",
       " 15476,\n",
       " 15484,\n",
       " 15499,\n",
       " 15523,\n",
       " 15532,\n",
       " 15542,\n",
       " 15547,\n",
       " 15579,\n",
       " 15580,\n",
       " 15584,\n",
       " 15596,\n",
       " 15598,\n",
       " 15616,\n",
       " 15643,\n",
       " 15674,\n",
       " 15685,\n",
       " 15696,\n",
       " 15743,\n",
       " 15773,\n",
       " 15778,\n",
       " 15782,\n",
       " 15846,\n",
       " 15860,\n",
       " 15871,\n",
       " 15911,\n",
       " 15960,\n",
       " 15999,\n",
       " 16004,\n",
       " 16019,\n",
       " 16063,\n",
       " 16065,\n",
       " 16082,\n",
       " 16103,\n",
       " 16117,\n",
       " 16123,\n",
       " 16130,\n",
       " 16136,\n",
       " 16146,\n",
       " 16151,\n",
       " 16155,\n",
       " 16173,\n",
       " 16179,\n",
       " 16185,\n",
       " 16196,\n",
       " 16212,\n",
       " 16221,\n",
       " 16292,\n",
       " 16294,\n",
       " 16346,\n",
       " 16379,\n",
       " 16382,\n",
       " 16399,\n",
       " 16404,\n",
       " 16410,\n",
       " 16454,\n",
       " 16456,\n",
       " 16488,\n",
       " 16490,\n",
       " 16515,\n",
       " 16523,\n",
       " 16535,\n",
       " 16566,\n",
       " 16583,\n",
       " 16595,\n",
       " 16602,\n",
       " 16642,\n",
       " 16659,\n",
       " 16679,\n",
       " 16725,\n",
       " 16731,\n",
       " 16743,\n",
       " 16748,\n",
       " 16750,\n",
       " 16755,\n",
       " 16760,\n",
       " 16762,\n",
       " 16794,\n",
       " 16798,\n",
       " 16802,\n",
       " 16810,\n",
       " 16817,\n",
       " 16827,\n",
       " 16835,\n",
       " 16838,\n",
       " 16878,\n",
       " 16907,\n",
       " 16966,\n",
       " 16978,\n",
       " 16984,\n",
       " 17015,\n",
       " 17030,\n",
       " 17039,\n",
       " 17096,\n",
       " 17098,\n",
       " 17132,\n",
       " 17168,\n",
       " 17209,\n",
       " 17247,\n",
       " 17279,\n",
       " 17299,\n",
       " 17314,\n",
       " 17321,\n",
       " 17326,\n",
       " 17347,\n",
       " 17391,\n",
       " 17412,\n",
       " 17414,\n",
       " 17462,\n",
       " 17470,\n",
       " 17505,\n",
       " 17531,\n",
       " 17537,\n",
       " 17587,\n",
       " 17594,\n",
       " 17635,\n",
       " 17643,\n",
       " 17644,\n",
       " 17648,\n",
       " 17708,\n",
       " 17710,\n",
       " 17717,\n",
       " 17723,\n",
       " 17725,\n",
       " 17750,\n",
       " 17752,\n",
       " 17757,\n",
       " 17762,\n",
       " 17773,\n",
       " 17790,\n",
       " 17811,\n",
       " ...]"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index_1 = df.loc[df.workclass == ' ?'].index.tolist()\n",
    "index_1 # 缺失值的索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "25c21991-d0ec-47ad-805e-7c447edb16e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "dic = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "fc4c069b-0e7f-4400-9892-b86888f332d0",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{27: ' Private', 61: ' Federal-gov', 69: ' Never-worked', 77: ' Private', 106: ' Never-worked', 128: ' Never-worked', 149: ' Private', 154: ' Federal-gov', 160: ' Local-gov', 187: ' Local-gov', 201: ' Never-worked', 221: ' Private', 226: ' Federal-gov', 243: ' Never-worked', 266: ' Never-worked', 297: ' Federal-gov', 312: ' Never-worked', 326: ' Never-worked', 346: ' Private', 347: ' Never-worked', 354: ' Private', 397: ' Never-worked', 408: ' Never-worked', 430: ' Self-emp-not-inc', 431: ' Never-worked', 449: ' Never-worked', 459: ' Never-worked', 471: ' Never-worked', 484: ' Federal-gov', 486: ' Local-gov', 499: ' Private', 511: ' Private', 515: ' Local-gov', 517: ' Never-worked', 518: ' Never-worked', 539: ' Never-worked', 576: ' Never-worked', 580: ' Local-gov', 590: ' Private', 591: ' State-gov', 596: ' Never-worked', 648: ' Self-emp-inc', 656: ' Local-gov', 668: ' Never-worked', 671: ' Never-worked', 686: ' Private', 695: ' Never-worked', 734: ' Local-gov', 788: ' Private', 789: ' Never-worked', 792: ' Never-worked', 806: ' Local-gov', 811: ' Never-worked', 830: ' Private', 834: ' Federal-gov', 885: ' Federal-gov', 890: ' Private', 903: ' Private', 924: ' Self-emp-not-inc', 931: ' Never-worked', 969: ' Private', 982: ' Never-worked', 1019: ' Never-worked', 1034: ' Private', 1035: ' Never-worked', 1039: ' Local-gov', 1046: ' State-gov', 1097: ' Never-worked', 1100: ' Private', 1131: ' Never-worked', 1134: ' State-gov', 1152: ' Never-worked', 1167: ' Never-worked', 1175: ' Federal-gov', 1180: ' Private', 1185: ' Local-gov', 1208: ' Never-worked', 1215: ' Private', 1217: ' Never-worked', 1262: ' Self-emp-not-inc', 1282: ' Federal-gov', 1290: ' Private', 1312: ' Never-worked', 1317: ' Private', 1325: ' Never-worked', 1347: ' Never-worked', 1349: ' Private', 1371: ' Private', 1388: ' Never-worked', 1404: ' Private', 1420: ' Never-worked', 1433: ' Never-worked', 1441: ' Private', 1452: ' Never-worked', 1458: ' Self-emp-not-inc', 1504: ' Federal-gov', 1544: ' Private', 1562: ' Private', 1569: ' Never-worked', 1570: ' Never-worked', 1574: ' Local-gov', 1579: ' Private', 1592: ' Never-worked', 1606: ' Never-worked', 1629: ' Private', 1656: ' Never-worked', 1665: ' Private', 1676: ' Federal-gov', 1704: ' Never-worked', 1707: ' Private', 1758: ' Private', 1761: ' Federal-gov', 1773: ' Never-worked', 1778: ' Never-worked', 1823: ' Never-worked', 1846: ' Never-worked', 1851: ' Local-gov', 1865: ' Federal-gov', 1878: ' Never-worked', 1923: ' Never-worked', 1931: ' Private', 1971: ' Private', 1987: ' Private', 2025: ' Never-worked', 2036: ' Never-worked', 2046: ' Never-worked', 2061: ' Never-worked', 2072: ' Federal-gov', 2084: ' Private', 2091: ' Never-worked', 2094: ' Never-worked', 2105: ' Never-worked', 2118: ' Private', 2126: ' Never-worked', 2152: ' Never-worked', 2155: ' Private', 2163: ' Private', 2164: ' Private', 2210: ' State-gov', 2213: ' Private', 2222: ' Never-worked', 2281: ' Never-worked', 2292: ' Never-worked', 2323: ' Never-worked', 2327: ' Never-worked', 2340: ' Private', 2354: ' Never-worked', 2356: ' Never-worked', 2358: ' Private', 2359: ' Never-worked', 2372: ' Self-emp-not-inc', 2380: ' Never-worked', 2382: ' Private', 2397: ' Never-worked', 2420: ' Federal-gov', 2427: ' Never-worked', 2464: ' Private', 2476: ' Never-worked', 2486: ' Self-emp-not-inc', 2491: ' Federal-gov', 2496: ' Never-worked', 2506: ' Never-worked', 2513: ' Private', 2521: ' Federal-gov', 2544: ' Never-worked', 2566: ' Federal-gov', 2570: ' Private', 2571: ' Never-worked', 2578: ' Private', 2586: ' Never-worked', 2594: ' Private', 2606: ' Never-worked', 2632: ' Private', 2634: ' Private', 2638: ' Local-gov', 2673: ' Never-worked', 2676: ' Local-gov', 2689: ' Never-worked', 2751: ' Private', 2760: ' Private', 2761: ' Private', 2847: ' Never-worked', 2856: ' Federal-gov', 2857: ' Private', 2858: ' Never-worked', 2885: ' Private', 2930: ' Never-worked', 2932: ' Local-gov', 2947: ' Never-worked', 2952: ' Never-worked', 2961: ' Without-pay', 2999: ' Private', 3005: ' Never-worked', 3033: ' Private', 3042: ' Private', 3065: ' Private', 3072: ' Never-worked', 3088: ' Never-worked', 3096: ' Never-worked', 3119: ' State-gov', 3131: ' Never-worked', 3146: ' Private', 3207: ' Never-worked', 3211: ' Self-emp-not-inc', 3228: ' Never-worked', 3231: ' Private', 3239: ' Never-worked', 3255: ' Private', 3269: ' Never-worked', 3291: ' Private', 3297: ' Federal-gov', 3330: ' Private', 3338: ' Local-gov', 3351: ' Private', 3371: ' Never-worked', 3387: ' Private', 3402: ' Private', 3439: ' Private', 3453: ' Never-worked', 3456: ' Never-worked', 3459: ' Never-worked', 3486: ' Never-worked', 3516: ' Never-worked', 3531: ' Private', 3555: ' Never-worked', 3572: ' Never-worked', 3573: ' Never-worked', 3579: ' Never-worked', 3588: ' Local-gov', 3592: ' Private', 3594: ' Private', 3597: ' Never-worked', 3631: ' Private', 3670: ' Self-emp-not-inc', 3703: ' Never-worked', 3725: ' Never-worked', 3736: ' State-gov', 3744: ' Never-worked', 3747: ' Never-worked', 3759: ' Private', 3773: ' Private', 3775: ' Never-worked', 3805: ' Never-worked', 3822: ' Local-gov', 3834: ' Private', 3843: ' Never-worked', 3851: ' Never-worked', 3863: ' Private', 3887: ' Self-emp-not-inc', 3895: ' Private', 3897: ' Never-worked', 3901: ' Never-worked', 3902: ' Never-worked', 3916: ' Never-worked', 3942: ' Never-worked', 3947: ' Private', 3949: ' Local-gov', 3950: ' Never-worked', 3963: ' Self-emp-not-inc', 3969: ' Private', 3980: ' Private', 3990: ' Local-gov', 4003: ' Never-worked', 4017: ' Federal-gov', 4018: ' Private', 4021: ' Never-worked', 4072: ' Never-worked', 4080: ' Never-worked', 4086: ' Private', 4090: ' Never-worked', 4109: ' Local-gov', 4147: ' Private', 4152: ' Never-worked', 4155: ' Private', 4169: ' Private', 4174: ' Never-worked', 4200: ' Never-worked', 4212: ' Private', 4215: ' State-gov', 4217: ' Never-worked', 4240: ' Never-worked', 4272: ' Never-worked', 4288: ' Local-gov', 4299: ' Never-worked', 4309: ' Private', 4315: ' Federal-gov', 4337: ' Private', 4369: ' Never-worked', 4393: ' Never-worked', 4409: ' Never-worked', 4413: ' Never-worked', 4422: ' Federal-gov', 4435: ' Private', 4437: ' Private', 4460: ' Private', 4464: ' Private', 4499: ' Never-worked', 4500: ' Private', 4519: ' Never-worked', 4531: ' Never-worked', 4548: ' State-gov', 4553: ' Never-worked', 4592: ' Never-worked', 4606: ' Private', 4607: ' Never-worked', 4613: ' Self-emp-not-inc', 4621: ' Never-worked', 4627: ' Private', 4655: ' Never-worked', 4685: ' Self-emp-inc', 4688: ' Never-worked', 4721: ' Private', 4729: ' Private', 4745: ' Private', 4752: ' Private', 4755: ' Private', 4766: ' Federal-gov', 4778: ' Self-emp-not-inc', 4782: ' Private', 4801: ' Local-gov', 4812: ' Private', 4818: ' Never-worked', 4835: ' Private', 4838: ' Private', 4884: ' Never-worked', 4925: ' Private', 4942: ' Never-worked', 4958: ' Never-worked', 4970: ' Private', 4980: ' Private', 4982: ' Never-worked', 5020: ' Never-worked', 5064: ' Never-worked', 5147: ' Never-worked', 5156: ' Private', 5172: ' Private', 5193: ' Private', 5198: ' Federal-gov', 5207: ' Private', 5209: ' Never-worked', 5214: ' Private', 5228: ' Local-gov', 5255: ' State-gov', 5294: ' Federal-gov', 5296: ' Never-worked', 5302: ' Never-worked', 5307: ' Private', 5320: ' Never-worked', 5321: ' Never-worked', 5340: ' Private', 5344: ' Private', 5346: ' Never-worked', 5383: ' Never-worked', 5384: ' Never-worked', 5440: ' Private', 5446: ' Never-worked', 5471: ' Private', 5492: ' Private', 5526: ' Never-worked', 5529: ' Private', 5548: ' Private', 5561: ' Private', 5565: ' Never-worked', 5590: ' Self-emp-not-inc', 5623: ' Private', 5632: ' Self-emp-not-inc', 5652: ' State-gov', 5679: ' Self-emp-inc', 5687: ' Local-gov', 5720: ' Never-worked', 5753: ' Private', 5766: ' Self-emp-not-inc', 5788: ' Never-worked', 5803: ' Never-worked', 5808: ' Private', 5832: ' Local-gov', 5853: ' Never-worked', 5916: ' Private', 5921: ' State-gov', 5928: ' Never-worked', 5977: ' Never-worked', 5983: ' Never-worked', 6015: ' Private', 6039: ' Private', 6051: ' Private', 6059: ' Private', 6131: ' Never-worked', 6178: ' Self-emp-not-inc', 6231: ' Federal-gov', 6284: ' Private', 6285: ' Never-worked', 6314: ' Never-worked', 6342: ' Local-gov', 6351: ' Never-worked', 6407: ' Private', 6432: ' Federal-gov', 6448: ' Never-worked', 6510: ' Private', 6521: ' Never-worked', 6536: ' Never-worked', 6542: ' Never-worked', 6549: ' Never-worked', 6558: ' Never-worked', 6564: ' Never-worked', 6591: ' Never-worked', 6640: ' Never-worked', 6646: ' State-gov', 6663: ' Never-worked', 6679: ' Private', 6733: ' Never-worked', 6734: ' Never-worked', 6753: ' Private', 6766: ' Federal-gov', 6798: ' Federal-gov', 6834: ' Local-gov', 6860: ' Never-worked', 6862: ' Local-gov', 6877: ' Never-worked', 6896: ' Federal-gov', 6914: ' Federal-gov', 6935: ' Private', 6948: ' Private', 6993: ' Never-worked', 6995: ' Never-worked', 7011: ' Never-worked', 7027: ' Private', 7049: ' Never-worked', 7075: ' Private', 7100: ' Private', 7102: ' Never-worked', 7106: ' Private', 7136: ' Never-worked', 7149: ' Never-worked', 7164: ' Never-worked', 7167: ' Private', 7173: ' Federal-gov', 7193: ' Private', 7291: ' Never-worked', 7301: ' Self-emp-not-inc', 7322: ' Never-worked', 7340: ' Never-worked', 7352: ' Never-worked', 7437: ' Never-worked', 7457: ' Never-worked', 7463: ' Federal-gov', 7472: ' Never-worked', 7510: ' Private', 7554: ' Private', 7559: ' Private', 7560: ' Never-worked', 7576: ' Private', 7579: ' Private', 7584: ' Never-worked', 7611: ' Never-worked', 7663: ' Never-worked', 7683: ' Never-worked', 7724: ' Private', 7740: ' Never-worked', 7746: ' Federal-gov', 7750: ' Never-worked', 7763: ' Federal-gov', 7773: ' Never-worked', 7784: ' Never-worked', 7787: ' Never-worked', 7815: ' Federal-gov', 7826: ' Private', 7839: ' Federal-gov', 7862: ' State-gov', 7872: ' Local-gov', 7876: ' Private', 7900: ' State-gov', 7905: ' Local-gov', 7942: ' Private', 7963: ' Never-worked', 7971: ' Private', 7977: ' State-gov', 7999: ' Never-worked', 8002: ' State-gov', 8007: ' Private', 8022: ' Private', 8042: ' Never-worked', 8053: ' Local-gov', 8057: ' Private', 8069: ' Private', 8085: ' Private', 8088: ' Private', 8096: ' Federal-gov', 8098: ' Never-worked', 8100: ' Never-worked', 8134: ' Never-worked', 8147: ' Never-worked', 8168: ' Never-worked', 8189: ' Never-worked', 8193: ' Private', 8222: ' Local-gov', 8241: ' Never-worked', 8297: ' Never-worked', 8322: ' Never-worked', 8364: ' Private', 8387: ' Never-worked', 8429: ' Never-worked', 8446: ' Local-gov', 8447: ' Self-emp-not-inc', 8472: ' Self-emp-inc', 8499: ' Private', 8532: ' Never-worked', 8543: ' Private', 8565: ' Never-worked', 8607: ' Never-worked', 8636: ' Federal-gov', 8643: ' Private', 8673: ' Never-worked', 8692: ' Federal-gov', 8694: ' Private', 8749: ' Never-worked', 8757: ' Never-worked', 8764: ' Private', 8769: ' Never-worked', 8782: ' Never-worked', 8788: ' Never-worked', 8795: ' Local-gov', 8805: ' Never-worked', 8822: ' Private', 8847: ' Private', 8853: ' Private', 8908: ' Never-worked', 8920: ' Never-worked', 8940: ' Private', 8949: ' Local-gov', 8954: ' Private', 8963: ' Private', 8985: ' Never-worked', 8991: ' Never-worked', 8996: ' Never-worked', 9028: ' Private', 9030: ' Local-gov', 9106: ' Private', 9138: ' Local-gov', 9140: ' Local-gov', 9141: ' Never-worked', 9147: ' Self-emp-not-inc', 9148: ' Never-worked', 9155: ' Never-worked', 9170: ' Never-worked', 9178: ' Never-worked', 9197: ' Private', 9211: ' Never-worked', 9214: ' Never-worked', 9245: ' Private', 9293: ' Never-worked', 9324: ' Private', 9340: ' Private', 9342: ' Private', 9351: ' Private', 9353: ' Never-worked', 9358: ' Never-worked', 9367: ' Never-worked', 9409: ' Never-worked', 9452: ' Never-worked', 9477: ' Never-worked', 9484: ' Federal-gov', 9490: ' Never-worked', 9500: ' Never-worked', 9531: ' Private', 9536: ' Federal-gov', 9549: ' Private', 9557: ' Local-gov', 9577: ' Private', 9582: ' Private', 9616: ' Never-worked', 9626: ' State-gov', 9650: ' State-gov', 9703: ' Local-gov', 9708: ' Never-worked', 9712: ' State-gov', 9778: ' Private', 9787: ' Federal-gov', 9860: ' Never-worked', 9872: ' Federal-gov', 9885: ' Private', 9907: ' Private', 9926: ' Private', 9927: ' Never-worked', 9938: ' Local-gov', 9987: ' Self-emp-not-inc', 10013: ' Local-gov', 10015: ' Private', 10016: ' Never-worked', 10035: ' Private', 10056: ' Never-worked', 10064: ' Never-worked', 10094: ' Never-worked', 10098: ' Private', 10102: ' Private', 10110: ' Never-worked', 10117: ' Never-worked', 10126: ' Never-worked', 10139: ' Local-gov', 10143: ' Private', 10161: ' Never-worked', 10222: ' Never-worked', 10232: ' Never-worked', 10253: ' Never-worked', 10328: ' Local-gov', 10342: ' Private', 10361: ' Never-worked', 10411: ' Private', 10425: ' Federal-gov', 10437: ' Private', 10440: ' Never-worked', 10460: ' Local-gov', 10475: ' Never-worked', 10485: ' Never-worked', 10486: ' Never-worked', 10539: ' Never-worked', 10546: ' Private', 10570: ' Never-worked', 10581: ' Never-worked', 10673: ' Private', 10679: ' Never-worked', 10681: ' Private', 10683: ' Private', 10684: ' Private', 10700: ' Private', 10704: ' Never-worked', 10709: ' Local-gov', 10715: ' Private', 10718: ' Local-gov', 10745: ' Self-emp-inc', 10746: ' Never-worked', 10784: ' Never-worked', 10805: ' Never-worked', 10820: ' Private', 10828: ' Self-emp-not-inc', 10837: ' Private', 10856: ' Never-worked', 10881: ' Self-emp-inc', 10931: ' Private', 10932: ' Never-worked', 10955: ' Private', 10959: ' Never-worked', 10995: ' Never-worked', 11001: ' Never-worked', 11027: ' Private', 11039: ' Federal-gov', 11046: ' Never-worked', 11056: ' Private', 11059: ' Private', 11085: ' Private', 11087: ' Private', 11099: ' Federal-gov', 11158: ' Never-worked', 11159: ' Never-worked', 11164: ' Never-worked', 11192: ' Private', 11198: ' Self-emp-not-inc', 11217: ' Private', 11228: ' Private', 11234: ' Never-worked', 11261: ' Self-emp-not-inc', 11286: ' Self-emp-not-inc', 11294: ' Private', 11316: ' Private', 11328: ' Never-worked', 11334: ' Local-gov', 11340: ' State-gov', 11346: ' Local-gov', 11355: ' Local-gov', 11391: ' State-gov', 11413: ' Never-worked', 11421: ' Private', 11430: ' Private', 11460: ' Never-worked', 11474: ' Private', 11484: ' Private', 11500: ' Never-worked', 11516: ' Never-worked', 11526: ' Never-worked', 11532: ' Private', 11544: ' Self-emp-not-inc', 11550: ' Never-worked', 11573: ' Never-worked', 11578: ' Local-gov', 11580: ' Private', 11591: ' Never-worked', 11614: ' State-gov', 11621: ' Private', 11658: ' Never-worked', 11688: ' Private', 11691: ' Private', 11713: ' Never-worked', 11731: ' Self-emp-not-inc', 11732: ' Private', 11734: ' Local-gov', 11768: ' Never-worked', 11770: ' Private', 11773: ' Private', 11793: ' Private', 11851: ' Never-worked', 11864: ' Never-worked', 11938: ' Never-worked', 12007: ' Never-worked', 12008: ' Never-worked', 12020: ' Never-worked', 12029: ' Federal-gov', 12068: ' Never-worked', 12093: ' Never-worked', 12098: ' Private', 12130: ' Federal-gov', 12153: ' Private', 12161: ' Never-worked', 12175: ' Never-worked', 12198: ' Never-worked', 12213: ' Never-worked', 12214: ' Federal-gov', 12218: ' Never-worked', 12253: ' Never-worked', 12299: ' Private', 12326: ' Federal-gov', 12334: ' Private', 12351: ' Never-worked', 12373: ' Never-worked', 12377: ' Never-worked', 12405: ' Never-worked', 12411: ' Never-worked', 12428: ' Private', 12438: ' Private', 12451: ' Self-emp-not-inc', 12491: ' Never-worked', 12492: ' Self-emp-not-inc', 12543: ' State-gov', 12553: ' Local-gov', 12589: ' Private', 12609: ' Never-worked', 12627: ' Private', 12651: ' Never-worked', 12667: ' Never-worked', 12784: ' Never-worked', 12794: ' Never-worked', 12849: ' Never-worked', 12853: ' State-gov', 12858: ' Never-worked', 12908: ' Private', 12918: ' Private', 12919: ' Private', 12936: ' Private', 12981: ' Never-worked', 12991: ' Private', 12996: ' Private', 13025: ' Self-emp-not-inc', 13026: ' Self-emp-not-inc', 13042: ' Federal-gov', 13043: ' Never-worked', 13065: ' Federal-gov', 13069: ' Never-worked', 13074: ' Private', 13095: ' Private', 13110: ' Never-worked', 13153: ' Never-worked', 13161: ' Federal-gov', 13177: ' Never-worked', 13181: ' Private', 13182: ' Private', 13203: ' Private', 13234: ' Federal-gov', 13290: ' Private', 13302: ' Private', 13321: ' Private', 13347: ' Local-gov', 13365: ' Never-worked', 13370: ' Self-emp-not-inc', 13384: ' Private', 13447: ' Never-worked', 13493: ' Federal-gov', 13498: ' State-gov', 13504: ' Without-pay', 13515: ' Private', 13525: ' Never-worked', 13534: ' Private', 13552: ' Never-worked', 13556: ' Never-worked', 13569: ' Never-worked', 13586: ' Never-worked', 13608: ' Private', 13626: ' Never-worked', 13646: ' Private', 13711: ' State-gov', 13744: ' Never-worked', 13784: ' Federal-gov', 13815: ' Never-worked', 13843: ' Private', 13845: ' State-gov', 13884: ' Private', 13933: ' Never-worked', 13949: ' Private', 14005: ' Private', 14053: ' Private', 14067: ' Never-worked', 14108: ' Never-worked', 14118: ' Private', 14123: ' Private', 14151: ' State-gov', 14178: ' Private', 14203: ' Never-worked', 14213: ' Private', 14217: ' Private', 14254: ' Never-worked', 14255: ' Private', 14280: ' Never-worked', 14330: ' Self-emp-not-inc', 14344: ' Never-worked', 14348: ' Private', 14360: ' Never-worked', 14363: ' Never-worked', 14370: ' Never-worked', 14398: ' Never-worked', 14418: ' Private', 14430: ' Never-worked', 14441: ' Never-worked', 14499: ' Never-worked', 14534: ' Never-worked', 14535: ' Never-worked', 14536: ' Private', 14541: ' Never-worked', 14548: ' Never-worked', 14571: ' Private', 14574: ' Local-gov', 14578: ' Never-worked', 14602: ' Private', 14618: ' Private', 14646: ' Never-worked', 14672: ' Federal-gov', 14688: ' Never-worked', 14692: ' Never-worked', 14717: ' Private', 14718: ' Federal-gov', 14725: ' Private', 14742: ' Never-worked', 14746: ' Never-worked', 14859: ' Private', 14860: ' Private', 14870: ' Private', 14887: ' Private', 14911: ' Self-emp-not-inc', 14939: ' Never-worked', 14945: ' Never-worked', 14980: ' Private', 14982: ' Local-gov', 15012: ' Private', 15022: ' Never-worked', 15033: ' Never-worked', 15064: ' Never-worked', 15069: ' Never-worked', 15130: ' Local-gov', 15176: ' Private', 15192: ' Self-emp-not-inc', 15220: ' Private', 15238: ' Private', 15256: ' Private', 15266: ' Never-worked', 15286: ' Private', 15292: ' Never-worked', 15309: ' Private', 15310: ' Never-worked', 15350: ' Never-worked', 15414: ' Never-worked', 15424: ' Local-gov', 15426: ' State-gov', 15464: ' Never-worked', 15471: ' Private', 15476: ' Never-worked', 15484: ' Never-worked', 15499: ' Never-worked', 15523: ' Never-worked', 15532: ' Private', 15542: ' Never-worked', 15547: ' State-gov', 15579: ' Private', 15580: ' Private', 15584: ' Never-worked', 15596: ' Private', 15598: ' Private', 15616: ' Private', 15643: ' Private', 15674: ' State-gov', 15685: ' Never-worked', 15696: ' Never-worked', 15743: ' Never-worked', 15773: ' Private', 15778: ' Private', 15782: ' Never-worked', 15846: ' Local-gov', 15860: ' Local-gov', 15871: ' State-gov', 15911: ' Never-worked', 15960: ' Self-emp-not-inc', 15999: ' Never-worked', 16004: ' Federal-gov', 16019: ' Self-emp-inc', 16063: ' Private', 16065: ' Never-worked', 16082: ' Private', 16103: ' Private', 16117: ' Local-gov', 16123: ' Never-worked', 16130: ' Never-worked', 16136: ' Private', 16146: ' Never-worked', 16151: ' Private', 16155: ' Never-worked', 16173: ' Never-worked', 16179: ' Never-worked', 16185: ' Never-worked', 16196: ' Private', 16212: ' Never-worked', 16221: ' Never-worked', 16292: ' Federal-gov', 16294: ' Federal-gov', 16346: ' Never-worked', 16379: ' Never-worked', 16382: ' Never-worked', 16399: ' Never-worked', 16404: ' Federal-gov', 16410: ' Never-worked', 16454: ' Private', 16456: ' Local-gov', 16488: ' Never-worked', 16490: ' Federal-gov', 16515: ' Never-worked', 16523: ' Federal-gov', 16535: ' Never-worked', 16566: ' Never-worked', 16583: ' Never-worked', 16595: ' Never-worked', 16602: ' Private', 16642: ' Never-worked', 16659: ' Never-worked', 16679: ' Never-worked', 16725: ' Never-worked', 16731: ' Never-worked', 16743: ' Never-worked', 16748: ' Private', 16750: ' Self-emp-not-inc', 16755: ' Private', 16760: ' Never-worked', 16762: ' Local-gov', 16794: ' Never-worked', 16798: ' Private', 16802: ' Never-worked', 16810: ' Self-emp-not-inc', 16817: ' Self-emp-not-inc', 16827: ' Never-worked', 16835: ' Private', 16838: ' Private', 16878: ' Never-worked', 16907: ' Never-worked', 16966: ' Private', 16978: ' Private', 16984: ' Local-gov', 17015: ' Private', 17030: ' Never-worked', 17039: ' Never-worked', 17096: ' Never-worked', 17098: ' Private', 17132: ' Private', 17168: ' Federal-gov', 17209: ' Federal-gov', 17247: ' Never-worked', 17279: ' Private', 17299: ' Self-emp-not-inc', 17314: ' Private', 17321: ' Never-worked', 17326: ' Self-emp-inc', 17347: ' Local-gov', 17391: ' Never-worked', 17412: ' Never-worked', 17414: ' Local-gov', 17462: ' Private', 17470: ' Never-worked', 17505: ' Local-gov', 17531: ' Never-worked', 17537: ' Never-worked', 17587: ' Self-emp-not-inc', 17594: ' Private', 17635: ' Private', 17643: ' Never-worked', 17644: ' State-gov', 17648: ' Private', 17708: ' Private', 17710: ' Federal-gov', 17717: ' Never-worked', 17723: ' Never-worked', 17725: ' Private', 17750: ' Never-worked', 17752: ' Never-worked', 17757: ' Private', 17762: ' Never-worked', 17773: ' Private', 17790: ' Self-emp-not-inc', 17811: ' Never-worked', 17837: ' Private', 17876: ' Private', 17882: ' State-gov', 17902: ' Private', 17905: ' Private', 18008: ' Private', 18010: ' Private', 18019: ' Never-worked', 18036: ' Private', 18056: ' Never-worked', 18094: ' Never-worked', 18161: ' Without-pay', 18164: ' Self-emp-inc', 18181: ' Private', 18194: ' Local-gov', 18201: ' Private', 18217: ' Private', 18218: ' Private', 18232: ' Private', 18236: ' Private', 18244: ' Never-worked', 18257: ' Self-emp-inc', 18259: ' Never-worked', 18294: ' Never-worked', 18322: ' Never-worked', 18331: ' Private', 18337: ' Never-worked', 18342: ' Private', 18356: ' Never-worked', 18358: ' Private', 18362: ' Never-worked', 18384: ' Private', 18386: ' Private', 18409: ' Private', 18466: ' State-gov', 18470: ' Never-worked', 18496: ' Private', 18534: ' Private', 18541: ' Private', 18560: ' Local-gov', 18564: ' Local-gov', 18577: ' Private', 18599: ' Local-gov', 18600: ' Never-worked', 18604: ' Never-worked', 18615: ' Private', 18622: ' Private', 18655: ' Never-worked', 18720: ' Federal-gov', 18730: ' State-gov', 18750: ' Never-worked', 18753: ' Never-worked', 18794: ' Local-gov', 18805: ' Self-emp-not-inc', 18846: ' Never-worked', 18912: ' Never-worked', 18923: ' Private', 18924: ' Never-worked', 18931: ' Private', 18934: ' Private', 18942: ' Never-worked', 18952: ' Self-emp-not-inc', 18964: ' Never-worked', 18989: ' Private', 18992: ' Never-worked', 19041: ' Private', 19058: ' Private', 19073: ' Private', 19090: ' Never-worked', 19133: ' Private', 19134: ' Never-worked', 19153: ' State-gov', 19168: ' Private', 19180: ' Self-emp-not-inc', 19230: ' Private', 19233: ' Never-worked', 19240: ' Private', 19253: ' Never-worked', 19255: ' Never-worked', 19284: ' Private', 19311: ' Never-worked', 19318: ' Never-worked', 19320: ' Private', 19337: ' Private', 19345: ' Self-emp-inc', 19433: ' Private', 19438: ' Federal-gov', 19455: ' Never-worked', 19461: ' Never-worked', 19462: ' Private', 19492: ' Self-emp-inc', 19509: ' State-gov', 19544: ' Local-gov', 19546: ' Private', 19548: ' Private', 19561: ' Private', 19616: ' Federal-gov', 19620: ' Private', 19657: ' Federal-gov', 19706: ' Never-worked', 19708: ' Federal-gov', 19764: ' Never-worked', 19775: ' Never-worked', 19786: ' Private', 19788: ' Never-worked', 19812: ' Private', 19814: ' Private', 19819: ' Private', 19820: ' Never-worked', 19830: ' Private', 19842: ' Federal-gov', 19857: ' Never-worked', 19889: ' Private', 19896: ' Private', 19986: ' Never-worked', 20003: ' Private', 20007: ' Never-worked', 20009: ' Federal-gov', 20017: ' Local-gov', 20023: ' Private', 20029: ' Never-worked', 20031: ' Self-emp-not-inc', 20038: ' Private', 20064: ' Never-worked', 20068: ' Never-worked', 20072: ' Private', 20094: ' Never-worked', 20099: ' Never-worked', 20105: ' Federal-gov', 20159: ' Private', 20161: ' Never-worked', 20187: ' Never-worked', 20191: ' Private', 20205: ' Private', 20227: ' Private', 20266: ' Never-worked', 20269: ' Federal-gov', 20271: ' Private', 20291: ' Never-worked', 20302: ' Never-worked', 20314: ' Never-worked', 20321: ' State-gov', 20333: ' Self-emp-inc', 20396: ' Private', 20399: ' Federal-gov', 20434: ' Private', 20436: ' Self-emp-not-inc', 20474: ' Private', 20480: ' Federal-gov', 20527: ' Private', 20530: ' Never-worked', 20544: ' Never-worked', 20551: ' Never-worked', 20563: ' Private', 20576: ' Private', 20595: ' Local-gov', 20608: ' Self-emp-not-inc', 20612: ' Never-worked', 20646: ' Never-worked', 20656: ' State-gov', 20664: ' Private', 20686: ' Private', 20724: ' Private', 20757: ' Never-worked', 20773: ' Never-worked', 20775: ' Private', 20776: ' Never-worked', 20782: ' Never-worked', 20797: ' Private', 20803: ' Private', 20825: ' Private', 20826: ' Self-emp-not-inc', 20829: ' Self-emp-inc', 20868: ' Local-gov', 20877: ' Local-gov', 20880: ' Local-gov', 20915: ' Local-gov', 20930: ' Never-worked', 20938: ' Never-worked', 20941: ' Never-worked', 20953: ' Local-gov', 20963: ' Never-worked', 21039: ' Never-worked', 21096: ' Never-worked', 21114: ' Never-worked', 21125: ' Never-worked', 21142: ' State-gov', 21146: ' Federal-gov', 21152: ' Local-gov', 21158: ' Never-worked', 21176: ' Private', 21179: ' Never-worked', 21242: ' Never-worked', 21243: ' Private', 21272: ' Never-worked', 21274: ' Private', 21289: ' Private', 21348: ' Never-worked', 21356: ' Private', 21394: ' Never-worked', 21410: ' Private', 21413: ' Private', 21422: ' Federal-gov', 21428: ' Never-worked', 21437: ' Never-worked', 21453: ' Private', 21455: ' Never-worked', 21465: ' Never-worked', 21483: ' Never-worked', 21487: ' Private', 21491: ' Private', 21516: ' Local-gov', 21524: ' Private', 21528: ' Never-worked', 21537: ' Private', 21545: ' Never-worked', 21549: ' Never-worked', 21586: ' Never-worked', 21626: ' Private', 21631: ' Private', 21648: ' Self-emp-not-inc', 21666: ' Never-worked', 21686: ' Private', 21698: ' Private', 21725: ' Self-emp-inc', 21746: ' Private', 21762: ' Private', 21799: ' Private', 21803: ' Never-worked', 21805: ' Private', 21812: ' Private', 21829: ' Never-worked', 21847: ' Never-worked', 21851: ' Never-worked', 21857: ' Never-worked', 21861: ' State-gov', 21892: ' Private', 21914: ' Never-worked', 21919: ' Never-worked', 21946: ' Never-worked', 22006: ' Private', 22022: ' Never-worked', 22033: ' Private', 22042: ' Private', 22060: ' State-gov', 22075: ' Never-worked', 22094: ' Never-worked', 22101: ' Never-worked', 22109: ' Private', 22130: ' Private', 22165: ' Never-worked', 22173: ' Never-worked', 22225: ' Never-worked', 22254: ' Private', 22281: ' Never-worked', 22348: ' Private', 22350: ' Never-worked', 22373: ' Private', 22378: ' Private', 22379: ' Never-worked', 22387: ' Never-worked', 22396: ' Never-worked', 22405: ' Never-worked', 22422: ' Self-emp-not-inc', 22495: ' Private', 22502: ' Never-worked', 22510: ' Never-worked', 22545: ' Private', 22556: ' Private', 22633: ' Never-worked', 22644: ' Private', 22731: ' Private', 22751: ' Never-worked', 22757: ' Never-worked', 22786: ' Never-worked', 22795: ' Never-worked', 22798: ' State-gov', 22820: ' Federal-gov', 22833: ' Private', 22841: ' Never-worked', 22848: ' Never-worked', 22885: ' Never-worked', 22898: ' Local-gov', 22910: ' Never-worked', 22928: ' Never-worked', 22951: ' Private', 22976: ' Private', 22983: ' Federal-gov', 22997: ' Never-worked', 23009: ' Federal-gov', 23019: ' Private', 23062: ' Never-worked', 23099: ' Never-worked', 23109: ' Never-worked', 23123: ' Private', 23136: ' Never-worked', 23176: ' Never-worked', 23192: ' Never-worked', 23209: ' Private', 23228: ' State-gov', 23237: ' Private', 23267: ' Never-worked', 23281: ' Never-worked', 23315: ' Never-worked', 23330: ' Never-worked', 23333: ' Never-worked', 23336: ' Private', 23342: ' Never-worked', 23351: ' Never-worked', 23353: ' Never-worked', 23373: ' Never-worked', 23375: ' Private', 23388: ' Private', 23395: ' Never-worked', 23415: ' Private', 23421: ' Private', 23502: ' Private', 23535: ' Private', 23536: ' Private', 23538: ' Private', 23544: ' Never-worked', 23551: ' Never-worked', 23592: ' Private', 23636: ' Private', 23670: ' Never-worked', 23711: ' Private', 23729: ' Private', 23744: ' Private', 23755: ' Private', 23759: ' Self-emp-inc', 23793: ' Federal-gov', 23818: ' Local-gov', 23822: ' Never-worked', 23825: ' Local-gov', 23853: ' Federal-gov', 23865: ' Private', 23880: ' Private', 23915: ' Never-worked', 23918: ' Private', 23920: ' Never-worked', 23936: ' Never-worked', 23954: ' Self-emp-not-inc', 23980: ' Local-gov', 24003: ' Private', 24015: ' Private', 24047: ' Private', 24053: ' Private', 24054: ' Private', 24073: ' Private', 24084: ' Federal-gov', 24109: ' State-gov', 24133: ' Never-worked', 24149: ' Never-worked', 24155: ' Federal-gov', 24183: ' State-gov', 24195: ' Never-worked', 24232: ' Private', 24238: ' Private', 24241: ' Never-worked', 24247: ' Private', 24299: ' Private', 24308: ' Never-worked', 24318: ' Federal-gov', 24379: ' Private', 24386: ' Private', 24405: ' Federal-gov', 24429: ' Private', 24461: ' Private', 24475: ' Private', 24485: ' Local-gov', 24527: ' Private', 24532: ' Private', 24579: ' Private', 24580: ' Never-worked', 24636: ' Never-worked', 24644: ' Federal-gov', 24686: ' Never-worked', 24694: ' Never-worked', 24704: ' Private', 24714: ' Never-worked', 24735: ' Private', 24752: ' Never-worked', 24760: ' Never-worked', 24773: ' Never-worked', 24779: ' Private', 24781: ' Never-worked', 24787: ' Private', 24796: ' Never-worked', 24809: ' Never-worked', 24813: ' Private', 24821: ' Never-worked', 24828: ' Private', 24875: ' Private', 24876: ' Private', 24894: ' Local-gov', 24902: ' Private', 24913: ' Never-worked', 24949: ' Never-worked', 24998: ' Never-worked', 25009: ' Never-worked', 25034: ' Never-worked', 25044: ' Never-worked', 25053: ' Without-pay', 25056: ' Never-worked', 25060: ' Never-worked', 25067: ' Never-worked', 25076: ' Never-worked', 25099: ' Never-worked', 25122: ' Private', 25134: ' Never-worked', 25163: ' Federal-gov', 25186: ' Federal-gov', 25208: ' Never-worked', 25216: ' Never-worked', 25225: ' Private', 25238: ' Federal-gov', 25266: ' Private', 25270: ' Private', 25295: ' Private', 25303: ' Local-gov', 25321: ' Private', 25359: ' Private', 25397: ' Local-gov', 25417: ' Never-worked', 25424: ' Self-emp-inc', 25430: ' Never-worked', 25441: ' Private', 25457: ' Private', 25519: ' Never-worked', 25522: ' Local-gov', 25524: ' Private', 25537: ' Self-emp-not-inc', 25568: ' Private', 25578: ' Private', 25588: ' Never-worked', 25636: ' Never-worked', 25669: ' Never-worked', 25685: ' Private', 25703: ' Never-worked', 25726: ' State-gov', 25748: ' Never-worked', 25755: ' Never-worked', 25772: ' Private', 25802: ' Private', 25806: ' Private', 25813: ' Private', 25819: ' Federal-gov', 25826: ' Never-worked', 25835: ' Never-worked', 25846: ' Private', 25853: ' Private', 25877: ' Private', 25902: ' Private', 25928: ' Private', 25929: ' State-gov', 25930: ' Private', 25976: ' Private', 25980: ' Private', 26012: ' Private', 26040: ' Never-worked', 26051: ' Never-worked', 26081: ' Private', 26093: ' Self-emp-not-inc', 26119: ' Never-worked', 26127: ' Never-worked', 26137: ' Never-worked', 26143: ' Federal-gov', 26145: ' Never-worked', 26163: ' Private', 26225: ' Never-worked', 26244: ' Never-worked', 26259: ' Never-worked', 26287: ' Private', 26295: ' Never-worked', 26296: ' Federal-gov', 26312: ' Private', 26331: ' Never-worked', 26348: ' Private', 26363: ' Private', 26373: ' Local-gov', 26375: ' Local-gov', 26410: ' State-gov', 26416: ' Never-worked', 26421: ' Never-worked', 26450: ' Never-worked', 26489: ' Private', 26503: ' Self-emp-not-inc', 26512: ' Local-gov', 26513: ' Private', 26522: ' Private', 26549: ' State-gov', 26566: ' Federal-gov', 26586: ' Private', 26598: ' Never-worked', 26599: ' Never-worked', 26681: ' Private', 26687: ' State-gov', 26688: ' Never-worked', 26776: ' Never-worked', 26791: ' Never-worked', 26838: ' State-gov', 26866: ' Never-worked', 26924: ' Private', 26928: ' Private', 26958: ' Private', 26982: ' Federal-gov', 26986: ' Never-worked', 26992: ' Never-worked', 26998: ' Never-worked', 27009: ' Never-worked', 27018: ' Federal-gov', 27022: ' State-gov', 27038: ' Never-worked', 27051: ' Never-worked', 27085: ' Never-worked', 27086: ' Never-worked', 27100: ' Private', 27107: ' Private', 27140: ' Private', 27144: ' Private', 27179: ' Never-worked', 27201: ' Never-worked', 27225: ' Private', 27265: ' Never-worked', 27267: ' Private', 27317: ' Private', 27321: ' Never-worked', 27331: ' Private', 27350: ' Never-worked', 27382: ' Never-worked', 27395: ' Never-worked', 27426: ' Never-worked', 27452: ' Never-worked', 27466: ' Private', 27475: ' Federal-gov', 27498: ' Never-worked', 27507: ' Private', 27513: ' Private', 27518: ' Private', 27522: ' Never-worked', 27549: ' Never-worked', 27563: ' Never-worked', 27569: ' Private', 27578: ' State-gov', 27579: ' Private', 27590: ' Federal-gov', 27595: ' State-gov', 27613: ' Private', 27642: ' Self-emp-not-inc', 27657: ' Private', 27665: ' Federal-gov', 27676: ' Federal-gov', 27698: ' State-gov', 27702: ' Local-gov', 27707: ' Never-worked', 27723: ' Private', 27730: ' Never-worked', 27770: ' Private', 27774: ' Never-worked', 27808: ' Never-worked', 27844: ' Private', 27853: ' Private', 27898: ' Private', 27915: ' Private', 27935: ' Private', 27944: ' Private', 27971: ' Private', 27993: ' Private', 28009: ' Private', 28020: ' Local-gov', 28037: ' Never-worked', 28043: ' Federal-gov', 28112: ' Private', 28138: ' Never-worked', 28143: ' Private', 28144: ' Never-worked', 28145: ' Local-gov', 28148: ' Private', 28164: ' Private', 28175: ' Never-worked', 28194: ' Never-worked', 28206: ' Private', 28210: ' Private', 28241: ' Never-worked', 28271: ' Private', 28276: ' Never-worked', 28293: ' Private', 28303: ' Never-worked', 28320: ' Never-worked', 28344: ' Private', 28365: ' Private', 28382: ' Never-worked', 28384: ' Never-worked', 28394: ' Never-worked', 28425: ' Federal-gov', 28476: ' Local-gov', 28478: ' Private', 28503: ' Never-worked', 28543: ' Private', 28548: ' Private', 28586: ' Never-worked', 28603: ' Self-emp-inc', 28629: ' Private', 28686: ' Private', 28688: ' Never-worked', 28715: ' Private', 28773: ' Never-worked', 28783: ' Local-gov', 28802: ' Never-worked', 28811: ' Never-worked', 28849: ' Private', 28855: ' Never-worked', 28859: ' Never-worked', 28885: ' Local-gov', 28891: ' Never-worked', 28904: ' State-gov', 28918: ' Private', 28922: ' Private', 28933: ' Never-worked', 28943: ' Never-worked', 28953: ' Self-emp-not-inc', 28957: ' Never-worked', 28961: ' Federal-gov', 29026: ' Never-worked', 29040: ' Private', 29061: ' Private', 29062: ' Private', 29072: ' Never-worked', 29094: ' Never-worked', 29097: ' Private', 29101: ' Never-worked', 29119: ' Local-gov', 29128: ' Never-worked', 29177: ' Private', 29206: ' State-gov', 29222: ' Local-gov', 29259: ' Never-worked', 29297: ' Never-worked', 29310: ' Never-worked', 29318: ' Never-worked', 29324: ' Local-gov', 29325: ' Never-worked', 29340: ' Federal-gov', 29359: ' Never-worked', 29360: ' State-gov', 29375: ' Never-worked', 29391: ' Never-worked', 29397: ' Private', 29421: ' Private', 29436: ' Never-worked', 29447: ' Private', 29455: ' Local-gov', 29465: ' Never-worked', 29480: ' Never-worked', 29527: ' Private', 29582: ' Never-worked', 29586: ' Never-worked', 29601: ' Private', 29614: ' Never-worked', 29662: ' Private', 29693: ' Never-worked', 29698: ' Private', 29710: ' Private', 29724: ' Private', 29737: ' Private', 29752: ' Self-emp-not-inc', 29791: ' Federal-gov', 29798: ' Never-worked', 29807: ' Never-worked', 29814: ' Private', 29819: ' Private', 29835: ' Never-worked', 29842: ' Local-gov', 29881: ' Federal-gov', 29902: ' State-gov', 29956: ' Private', 29967: ' Never-worked', 30042: ' Federal-gov', 30059: ' Never-worked', 30061: ' Private', 30068: ' Never-worked', 30102: ' Never-worked', 30156: ' State-gov', 30158: ' Private', 30163: ' Never-worked', 30206: ' Never-worked', 30208: ' Never-worked', 30209: ' Never-worked', 30218: ' Private', 30268: ' Private', 30277: ' Never-worked', 30295: ' Federal-gov', 30313: ' Never-worked', 30334: ' Never-worked', 30369: ' Never-worked', 30384: ' Never-worked', 30397: ' Federal-gov', 30402: ' Never-worked', 30412: ' Private', 30444: ' Never-worked', 30450: ' Private', 30456: ' Never-worked', 30468: ' Private', 30513: ' Never-worked', 30558: ' Private', 30569: ' Federal-gov', 30584: ' Never-worked', 30615: ' Never-worked', 30623: ' Local-gov', 30624: ' Private', 30628: ' State-gov', 30641: ' Never-worked', 30660: ' Never-worked', 30673: ' Never-worked', 30677: ' Never-worked', 30687: ' Private', 30693: ' Private', 30698: ' Never-worked', 30707: ' Self-emp-inc', 30711: ' Never-worked', 30713: ' Private', 30723: ' Federal-gov', 30727: ' Never-worked', 30744: ' Private', 30757: ' Local-gov', 30776: ' Never-worked', 30781: ' Federal-gov', 30784: ' Self-emp-not-inc', 30822: ' Never-worked', 30827: ' Federal-gov', 30830: ' Private', 30897: ' Never-worked', 30930: ' Never-worked', 30947: ' Private', 30977: ' Private', 31018: ' Never-worked', 31032: ' Never-worked', 31036: ' Never-worked', 31043: ' Private', 31061: ' Never-worked', 31062: ' Private', 31073: ' Never-worked', 31094: ' Never-worked', 31101: ' Never-worked', 31108: ' Never-worked', 31118: ' Private', 31123: ' Never-worked', 31178: ' Private', 31193: ' Never-worked', 31201: ' Never-worked', 31220: ' Private', 31235: ' Never-worked', 31247: ' Private', 31253: ' Private', 31273: ' Federal-gov', 31276: ' Private', 31279: ' Never-worked', 31307: ' Local-gov', 31313: ' Private', 31360: ' Private', 31372: ' Private', 31390: ' Private', 31421: ' Private', 31432: ' Self-emp-not-inc', 31527: ' Never-worked', 31536: ' Never-worked', 31540: ' Federal-gov', 31568: ' Never-worked', 31577: ' Private', 31591: ' Federal-gov', 31594: ' Private', 31605: ' Never-worked', 31621: ' Local-gov', 31633: ' Never-worked', 31635: ' Private', 31661: ' Self-emp-inc', 31664: ' Private', 31668: ' Private', 31696: ' Federal-gov', 31698: ' Never-worked', 31710: ' Private', 31711: ' Private', 31723: ' Private', 31731: ' Private', 31739: ' Private', 31751: ' Never-worked', 31753: ' Never-worked', 31758: ' Never-worked', 31765: ' Never-worked', 31772: ' Never-worked', 31775: ' Private', 31790: ' Never-worked', 31792: ' Never-worked', 31793: ' Private', 31810: ' Never-worked', 31821: ' Never-worked', 31836: ' Local-gov', 31862: ' Never-worked', 31871: ' Never-worked', 31885: ' State-gov', 31908: ' Private', 31912: ' Private', 31913: ' Never-worked', 31997: ' Federal-gov', 31998: ' Private', 32008: ' Private', 32016: ' Private', 32039: ' Never-worked', 32062: ' Never-worked', 32070: ' Never-worked', 32073: ' Private', 32075: ' Never-worked', 32080: ' Never-worked', 32083: ' Private', 32088: ' Never-worked', 32093: ' Private', 32103: ' Federal-gov', 32189: ' Never-worked', 32201: ' Private', 32206: ' Private', 32275: ' Never-worked', 32291: ' Federal-gov', 32310: ' Never-worked', 32317: ' Private', 32335: ' Never-worked', 32343: ' Private', 32426: ' Never-worked', 32477: ' State-gov', 32490: ' Private', 32494: ' Self-emp-not-inc', 32525: ' Self-emp-not-inc', 32530: ' Never-worked', 32531: ' Local-gov', 32539: ' Private', 32541: ' Private', 32542: ' Private'}\n"
     ]
    }
   ],
   "source": [
    "for k,v in zip(index_1,li_2):\n",
    "    dic[k] = v\n",
    "print(dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "c6306fb6-a810-4dca-82b9-86c3da0bb863",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 接下来用字典替换原数据里的值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "00caf43f-1aab-4943-9c88-e98c0a80c6e5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-190-d769568fbe18>:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df['workclass'][i] = k\n"
     ]
    }
   ],
   "source": [
    "for i,k in dic.items():\n",
    "    df['workclass'][i] = k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "id": "0b61b0fc-b018-4282-84d9-40168ef04fab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',\n",
       "       ' Local-gov', ' Self-emp-inc', ' Never-worked', ' Without-pay'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[:,'workclass'].unique() # 没有问好了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "fbe1310d-3979-4afb-982e-deb08cd84fc0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>77516.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>83311.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>215646.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>234721.0</td>\n",
       "      <td>11th</td>\n",
       "      <td>7.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>338409.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>Cuba</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32556</th>\n",
       "      <td>27.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>257302.0</td>\n",
       "      <td>Assoc-acdm</td>\n",
       "      <td>12.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Tech-support</td>\n",
       "      <td>Wife</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32557</th>\n",
       "      <td>40.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>154374.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32558</th>\n",
       "      <td>58.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>151910.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Widowed</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32559</th>\n",
       "      <td>22.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>201490.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32560</th>\n",
       "      <td>52.0</td>\n",
       "      <td>Self-emp-inc</td>\n",
       "      <td>287927.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Wife</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>15024.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>32561 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age          workclass    fnlwgt    education  education-num  \\\n",
       "0      39.0          State-gov   77516.0    Bachelors           13.0   \n",
       "1      50.0   Self-emp-not-inc   83311.0    Bachelors           13.0   \n",
       "2      38.0            Private  215646.0      HS-grad            9.0   \n",
       "3      53.0            Private  234721.0         11th            7.0   \n",
       "4      28.0            Private  338409.0    Bachelors           13.0   \n",
       "...     ...                ...       ...          ...            ...   \n",
       "32556  27.0            Private  257302.0   Assoc-acdm           12.0   \n",
       "32557  40.0            Private  154374.0      HS-grad            9.0   \n",
       "32558  58.0            Private  151910.0      HS-grad            9.0   \n",
       "32559  22.0            Private  201490.0      HS-grad            9.0   \n",
       "32560  52.0       Self-emp-inc  287927.0      HS-grad            9.0   \n",
       "\n",
       "            marital-status          occupation    relationship    race  \\\n",
       "0            Never-married        Adm-clerical   Not-in-family   White   \n",
       "1       Married-civ-spouse     Exec-managerial         Husband   White   \n",
       "2                 Divorced   Handlers-cleaners   Not-in-family   White   \n",
       "3       Married-civ-spouse   Handlers-cleaners         Husband   Black   \n",
       "4       Married-civ-spouse      Prof-specialty            Wife   Black   \n",
       "...                    ...                 ...             ...     ...   \n",
       "32556   Married-civ-spouse        Tech-support            Wife   White   \n",
       "32557   Married-civ-spouse   Machine-op-inspct         Husband   White   \n",
       "32558              Widowed        Adm-clerical       Unmarried   White   \n",
       "32559        Never-married        Adm-clerical       Own-child   White   \n",
       "32560   Married-civ-spouse     Exec-managerial            Wife   White   \n",
       "\n",
       "           sex  capital-gain  capital-loss  hours-per-week  native-country  \\\n",
       "0         Male        2174.0           0.0            40.0   United-States   \n",
       "1         Male           0.0           0.0            13.0   United-States   \n",
       "2         Male           0.0           0.0            40.0   United-States   \n",
       "3         Male           0.0           0.0            40.0   United-States   \n",
       "4       Female           0.0           0.0            40.0            Cuba   \n",
       "...        ...           ...           ...             ...             ...   \n",
       "32556   Female           0.0           0.0            38.0   United-States   \n",
       "32557     Male           0.0           0.0            40.0   United-States   \n",
       "32558   Female           0.0           0.0            40.0   United-States   \n",
       "32559     Male           0.0           0.0            20.0   United-States   \n",
       "32560   Female       15024.0           0.0            40.0   United-States   \n",
       "\n",
       "      Listing of attributes  \n",
       "0                     <=50K  \n",
       "1                     <=50K  \n",
       "2                     <=50K  \n",
       "3                     <=50K  \n",
       "4                     <=50K  \n",
       "...                     ...  \n",
       "32556                 <=50K  \n",
       "32557                  >50K  \n",
       "32558                 <=50K  \n",
       "32559                 <=50K  \n",
       "32560                  >50K  \n",
       "\n",
       "[32561 rows x 15 columns]"
      ]
     },
     "execution_count": 193,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "6192f6fc-57fb-44dd-8317-e26b96535b13",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[39. 50. 38. 53. 28. 37. 49. 52. 31. 42. 30. 23. 32. 40. 34. 25. 43. 54.\n",
      " 35. 59. 56. 19. 20. 45. 22. 48. 21. 24. 57. 44. 41. 29. 18. 47. 46. 36.\n",
      " 79. 27. 67. 33. 76. 17. 55. 61. 70. 64. 71. 68. 66. 51. 58. 26. 60. 90.\n",
      " 75. 65. 77. 62. 63. 80. 72. 74. 69. 73. 81. 78. 88. 82. 83. 84. 85. 86.\n",
      " 87.]\n",
      "[' State-gov' ' Self-emp-not-inc' ' Private' ' Federal-gov' ' Local-gov'\n",
      " ' Self-emp-inc' ' Never-worked' ' Without-pay']\n",
      "[ 77516.  83311. 215646. ...  34066.  84661. 257302.]\n",
      "[' Bachelors' ' HS-grad' ' 11th' ' Masters' ' 9th' ' Some-college'\n",
      " ' Assoc-acdm' ' Assoc-voc' ' 7th-8th' ' Doctorate' ' Prof-school'\n",
      " ' 5th-6th' ' 10th' ' 1st-4th' ' Preschool' ' 12th']\n",
      "[13.  9.  7. 14.  5. 10. 12. 11.  4. 16. 15.  3.  6.  2.  1.  8.]\n",
      "[' Never-married' ' Married-civ-spouse' ' Divorced'\n",
      " ' Married-spouse-absent' ' Separated' ' Married-AF-spouse' ' Widowed']\n",
      "[' Adm-clerical' ' Exec-managerial' ' Handlers-cleaners' ' Prof-specialty'\n",
      " ' Other-service' ' Sales' ' Craft-repair' ' Transport-moving'\n",
      " ' Farming-fishing' ' Machine-op-inspct' ' Tech-support' ' ?'\n",
      " ' Protective-serv' ' Armed-Forces' ' Priv-house-serv']\n",
      "[' Not-in-family' ' Husband' ' Wife' ' Own-child' ' Unmarried'\n",
      " ' Other-relative']\n",
      "[' White' ' Black' ' Asian-Pac-Islander' ' Amer-Indian-Eskimo' ' Other']\n",
      "[' Male' ' Female']\n",
      "[ 2174.     0. 14084.  5178.  5013.  2407. 14344. 15024.  7688. 34095.\n",
      "  4064.  4386.  7298.  1409.  3674.  1055.  3464.  2050.  2176.   594.\n",
      " 20051.  6849.  4101.  1111.  8614.  3411.  2597. 25236.  4650.  9386.\n",
      "  2463.  3103. 10605.  2964.  3325.  2580.  3471.  4865. 99999.  6514.\n",
      "  1471.  2329.  2105.  2885. 25124. 10520.  2202.  2961. 27828.  6767.\n",
      "  2228.  1506. 13550.  2635.  5556.  4787.  3781.  3137.  3818.  3942.\n",
      "   914.   401.  2829.  2977.  4934.  2062.  2354.  5455. 15020.  1424.\n",
      "  3273. 22040.  4416.  3908. 10566.   991.  4931.  1086.  7430.  6497.\n",
      "   114.  7896.  2346.  3418.  3432.  2907.  1151.  2414.  2290. 15831.\n",
      " 41310.  4508.  2538.  3456.  6418.  1848.  3887.  5721.  9562.  1455.\n",
      "  2036.  1831. 11678.  2936.  2993.  7443.  6360.  1797.  1173.  4687.\n",
      "  6723.  2009.  6097.  2653.  1639. 18481.  7978.  2387.  5060.]\n",
      "[   0. 2042. 1408. 1902. 1573. 1887. 1719. 1762. 1564. 2179. 1816. 1980.\n",
      " 1977. 1876. 1340. 2206. 1741. 1485. 2339. 2415. 1380. 1721. 2051. 2377.\n",
      " 1669. 2352. 1672.  653. 2392. 1504. 2001. 1590. 1651. 1628. 1848. 1740.\n",
      " 2002. 1579. 2258. 1602.  419. 2547. 2174. 2205. 1726. 2444. 1138. 2238.\n",
      "  625.  213. 1539.  880. 1668. 1092. 1594. 3004. 2231. 1844.  810. 2824.\n",
      " 2559. 2057. 1974.  974. 2149. 1825. 1735. 1258. 2129. 2603. 2282.  323.\n",
      " 4356. 2246. 1617. 1648. 2489. 3770. 1755. 3683. 2267. 2080. 2457.  155.\n",
      " 3900. 2201. 1944. 2467. 2163. 2754. 2472. 1411.]\n",
      "[40. 13. 16. 45. 50. 80. 30. 35. 60. 20. 52. 44. 15. 25. 38. 43. 55. 48.\n",
      " 58. 32. 70.  2. 22. 56. 41. 28. 36. 24. 46. 42. 12. 65.  1. 10. 34. 75.\n",
      " 98. 33. 54.  8.  6. 64. 19. 18. 72.  5.  9. 47. 37. 21. 26. 14.  4. 59.\n",
      "  7. 99. 53. 39. 62. 57. 78. 90. 66. 11. 49. 84.  3. 17. 68. 27. 85. 31.\n",
      " 51. 77. 63. 23. 87. 88. 73. 89. 97. 94. 29. 96. 67. 82. 86. 91. 81. 76.\n",
      " 92. 61. 74. 95.]\n",
      "[' United-States' ' Cuba' ' Jamaica' ' India' ' ?' ' Mexico' ' South'\n",
      " ' Puerto-Rico' ' Honduras' ' England' ' Canada' ' Germany' ' Iran'\n",
      " ' Philippines' ' Italy' ' Poland' ' Columbia' ' Cambodia' ' Thailand'\n",
      " ' Ecuador' ' Laos' ' Taiwan' ' Haiti' ' Portugal' ' Dominican-Republic'\n",
      " ' El-Salvador' ' France' ' Guatemala' ' China' ' Japan' ' Yugoslavia'\n",
      " ' Peru' ' Outlying-US(Guam-USVI-etc)' ' Scotland' ' Trinadad&Tobago'\n",
      " ' Greece' ' Nicaragua' ' Vietnam' ' Hong' ' Ireland' ' Hungary'\n",
      " ' Holand-Netherlands']\n",
      "[' <=50K' ' >50K']\n"
     ]
    }
   ],
   "source": [
    "for i in df.columns.tolist():\n",
    "    print(df.loc[:,i].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01dfd071-c014-4aa1-9b16-4bdefbafe582",
   "metadata": {},
   "source": [
    "# 填补occupation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "id": "18054771-8df7-4a21-9ba4-751752f02fbb",
   "metadata": {},
   "outputs": [],
   "source": [
    "index_2  = df.loc[df.occupation == ' ?'].index.tolist()\n",
    "# 索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "id": "5642c9f7-709f-4d52-b447-7d4798a24702",
   "metadata": {},
   "outputs": [],
   "source": [
    "for j in index_2:\n",
    "    if df.loc[j,'workclass'] == ' Without-pay' or ' Never-worked':\n",
    "        df.loc[j,'occupation'] = 'unemployed'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "f0d39115-c1cb-4f40-a771-18cbde6d0d52",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [age, workclass, fnlwgt, education, education-num, marital-status, occupation, relationship, race, sex, capital-gain, capital-loss, hours-per-week, native-country, Listing of attributes]\n",
       "Index: []"
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df.occupation == ' ?'] \n",
    "# occupation的缺失值填完"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d1b03e1e-713a-4a35-9a22-bf7c253a81d2",
   "metadata": {},
   "source": [
    "# 填补native-country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "id": "c698d073-96c4-4fb0-ae70-12efa97aa280",
   "metadata": {},
   "outputs": [],
   "source": [
    "index_3 = df.loc[df['native-country']== ' ?'].index.tolist()\n",
    "# native-country的索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "id": "f22bd948-6ced-4ec4-9bc5-977422944cf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "for k in index_3:\n",
    "    if df.loc[k,'race'] ==  'Black':\n",
    "        df.loc[k,'native-country'] = 'India'\n",
    "    elif df.loc[k,'race'] == ' White':\n",
    "        df.loc[k,'native-country'] = 'United-States'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "id": "45ca8895-9889-45fd-b3c9-34bf5f0909a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>40.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>121772.0</td>\n",
       "      <td>Assoc-voc</td>\n",
       "      <td>11.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Craft-repair</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>30.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>117747.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Sales</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1573.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>39.0</td>\n",
       "      <td>Federal-gov</td>\n",
       "      <td>157443.0</td>\n",
       "      <td>Masters</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>unemployed</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Female</td>\n",
       "      <td>3464.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>453</th>\n",
       "      <td>42.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>197583.0</td>\n",
       "      <td>Assoc-acdm</td>\n",
       "      <td>12.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1026</th>\n",
       "      <td>30.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>201624.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31701</th>\n",
       "      <td>36.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>137421.0</td>\n",
       "      <td>12th</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Transport-moving</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31796</th>\n",
       "      <td>31.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>190027.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Other-service</td>\n",
       "      <td>Other-relative</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32169</th>\n",
       "      <td>25.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>149943.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Other-service</td>\n",
       "      <td>Other-relative</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Male</td>\n",
       "      <td>4101.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32232</th>\n",
       "      <td>30.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>215441.0</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>Other</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32254</th>\n",
       "      <td>31.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>251659.0</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Other-service</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Asian-Pac-Islander</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1485.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>?</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>197 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age     workclass    fnlwgt      education  education-num  \\\n",
       "14     40.0       Private  121772.0      Assoc-voc           11.0   \n",
       "93     30.0       Private  117747.0        HS-grad            9.0   \n",
       "297    39.0   Federal-gov  157443.0        Masters           14.0   \n",
       "453    42.0       Private  197583.0     Assoc-acdm           12.0   \n",
       "1026   30.0       Private  201624.0      Bachelors           13.0   \n",
       "...     ...           ...       ...            ...            ...   \n",
       "31701  36.0       Private  137421.0           12th            8.0   \n",
       "31796  31.0       Private  190027.0        HS-grad            9.0   \n",
       "32169  25.0       Private  149943.0        HS-grad            9.0   \n",
       "32232  30.0       Private  215441.0   Some-college           10.0   \n",
       "32254  31.0       Private  251659.0   Some-college           10.0   \n",
       "\n",
       "            marital-status         occupation     relationship  \\\n",
       "14      Married-civ-spouse       Craft-repair          Husband   \n",
       "93      Married-civ-spouse              Sales             Wife   \n",
       "297     Married-civ-spouse         unemployed             Wife   \n",
       "453     Married-civ-spouse    Exec-managerial          Husband   \n",
       "1026         Never-married     Prof-specialty    Not-in-family   \n",
       "...                    ...                ...              ...   \n",
       "31701        Never-married   Transport-moving    Not-in-family   \n",
       "31796        Never-married      Other-service   Other-relative   \n",
       "32169        Never-married      Other-service   Other-relative   \n",
       "32232        Never-married       Adm-clerical    Not-in-family   \n",
       "32254   Married-civ-spouse      Other-service          Husband   \n",
       "\n",
       "                      race      sex  capital-gain  capital-loss  \\\n",
       "14      Asian-Pac-Islander     Male           0.0           0.0   \n",
       "93      Asian-Pac-Islander   Female           0.0        1573.0   \n",
       "297     Asian-Pac-Islander   Female        3464.0           0.0   \n",
       "453                  Black     Male           0.0           0.0   \n",
       "1026                 Black     Male           0.0           0.0   \n",
       "...                    ...      ...           ...           ...   \n",
       "31701   Asian-Pac-Islander     Male           0.0           0.0   \n",
       "31796                Black   Female           0.0           0.0   \n",
       "32169   Asian-Pac-Islander     Male        4101.0           0.0   \n",
       "32232                Other     Male           0.0           0.0   \n",
       "32254   Asian-Pac-Islander     Male           0.0        1485.0   \n",
       "\n",
       "       hours-per-week native-country Listing of attributes  \n",
       "14               40.0              ?                  >50K  \n",
       "93               35.0              ?                 <=50K  \n",
       "297              40.0              ?                 <=50K  \n",
       "453              40.0              ?                  >50K  \n",
       "1026             45.0              ?                 <=50K  \n",
       "...               ...            ...                   ...  \n",
       "31701            45.0              ?                 <=50K  \n",
       "31796            40.0              ?                 <=50K  \n",
       "32169            60.0              ?                 <=50K  \n",
       "32232            40.0              ?                 <=50K  \n",
       "32254            55.0              ?                  >50K  \n",
       "\n",
       "[197 rows x 15 columns]"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df['native-country'] == ' ?'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "id": "28c0f566-89e3-4dea-bd08-abcf092fc966",
   "metadata": {},
   "outputs": [],
   "source": [
    "for k in index_3:\n",
    "    if  df.loc[k,'race'] == 'Asian-Pac-Islander':\n",
    "        df.loc[k,'native-country'] = 'China'\n",
    "    else:\n",
    "        df.loc[k,'native-country'] = ' Haiti'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "id": "9920f2a9-0afd-413a-823a-cb0005dae7f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [age, workclass, fnlwgt, education, education-num, marital-status, occupation, relationship, race, sex, capital-gain, capital-loss, hours-per-week, native-country, Listing of attributes]\n",
       "Index: []"
      ]
     },
     "execution_count": 204,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df['native-country'] == ' ?'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "id": "9034bbf3-b8fa-4736-8f41-c196883859ba",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[39. 50. 38. 53. 28. 37. 49. 52. 31. 42. 30. 23. 32. 40. 34. 25. 43. 54.\n",
      " 35. 59. 56. 19. 20. 45. 22. 48. 21. 24. 57. 44. 41. 29. 18. 47. 46. 36.\n",
      " 79. 27. 67. 33. 76. 17. 55. 61. 70. 64. 71. 68. 66. 51. 58. 26. 60. 90.\n",
      " 75. 65. 77. 62. 63. 80. 72. 74. 69. 73. 81. 78. 88. 82. 83. 84. 85. 86.\n",
      " 87.]\n",
      "[' State-gov' ' Self-emp-not-inc' ' Private' ' Federal-gov' ' Local-gov'\n",
      " ' Self-emp-inc' ' Never-worked' ' Without-pay']\n",
      "[ 77516.  83311. 215646. ...  34066.  84661. 257302.]\n",
      "[' Bachelors' ' HS-grad' ' 11th' ' Masters' ' 9th' ' Some-college'\n",
      " ' Assoc-acdm' ' Assoc-voc' ' 7th-8th' ' Doctorate' ' Prof-school'\n",
      " ' 5th-6th' ' 10th' ' 1st-4th' ' Preschool' ' 12th']\n",
      "[13.  9.  7. 14.  5. 10. 12. 11.  4. 16. 15.  3.  6.  2.  1.  8.]\n",
      "[' Never-married' ' Married-civ-spouse' ' Divorced'\n",
      " ' Married-spouse-absent' ' Separated' ' Married-AF-spouse' ' Widowed']\n",
      "[' Adm-clerical' ' Exec-managerial' ' Handlers-cleaners' ' Prof-specialty'\n",
      " ' Other-service' ' Sales' ' Craft-repair' ' Transport-moving'\n",
      " ' Farming-fishing' ' Machine-op-inspct' ' Tech-support' 'unemployed'\n",
      " ' Protective-serv' ' Armed-Forces' ' Priv-house-serv']\n",
      "[' Not-in-family' ' Husband' ' Wife' ' Own-child' ' Unmarried'\n",
      " ' Other-relative']\n",
      "[' White' ' Black' ' Asian-Pac-Islander' ' Amer-Indian-Eskimo' ' Other']\n",
      "[' Male' ' Female']\n",
      "[ 2174.     0. 14084.  5178.  5013.  2407. 14344. 15024.  7688. 34095.\n",
      "  4064.  4386.  7298.  1409.  3674.  1055.  3464.  2050.  2176.   594.\n",
      " 20051.  6849.  4101.  1111.  8614.  3411.  2597. 25236.  4650.  9386.\n",
      "  2463.  3103. 10605.  2964.  3325.  2580.  3471.  4865. 99999.  6514.\n",
      "  1471.  2329.  2105.  2885. 25124. 10520.  2202.  2961. 27828.  6767.\n",
      "  2228.  1506. 13550.  2635.  5556.  4787.  3781.  3137.  3818.  3942.\n",
      "   914.   401.  2829.  2977.  4934.  2062.  2354.  5455. 15020.  1424.\n",
      "  3273. 22040.  4416.  3908. 10566.   991.  4931.  1086.  7430.  6497.\n",
      "   114.  7896.  2346.  3418.  3432.  2907.  1151.  2414.  2290. 15831.\n",
      " 41310.  4508.  2538.  3456.  6418.  1848.  3887.  5721.  9562.  1455.\n",
      "  2036.  1831. 11678.  2936.  2993.  7443.  6360.  1797.  1173.  4687.\n",
      "  6723.  2009.  6097.  2653.  1639. 18481.  7978.  2387.  5060.]\n",
      "[   0. 2042. 1408. 1902. 1573. 1887. 1719. 1762. 1564. 2179. 1816. 1980.\n",
      " 1977. 1876. 1340. 2206. 1741. 1485. 2339. 2415. 1380. 1721. 2051. 2377.\n",
      " 1669. 2352. 1672.  653. 2392. 1504. 2001. 1590. 1651. 1628. 1848. 1740.\n",
      " 2002. 1579. 2258. 1602.  419. 2547. 2174. 2205. 1726. 2444. 1138. 2238.\n",
      "  625.  213. 1539.  880. 1668. 1092. 1594. 3004. 2231. 1844.  810. 2824.\n",
      " 2559. 2057. 1974.  974. 2149. 1825. 1735. 1258. 2129. 2603. 2282.  323.\n",
      " 4356. 2246. 1617. 1648. 2489. 3770. 1755. 3683. 2267. 2080. 2457.  155.\n",
      " 3900. 2201. 1944. 2467. 2163. 2754. 2472. 1411.]\n",
      "[40. 13. 16. 45. 50. 80. 30. 35. 60. 20. 52. 44. 15. 25. 38. 43. 55. 48.\n",
      " 58. 32. 70.  2. 22. 56. 41. 28. 36. 24. 46. 42. 12. 65.  1. 10. 34. 75.\n",
      " 98. 33. 54.  8.  6. 64. 19. 18. 72.  5.  9. 47. 37. 21. 26. 14.  4. 59.\n",
      "  7. 99. 53. 39. 62. 57. 78. 90. 66. 11. 49. 84.  3. 17. 68. 27. 85. 31.\n",
      " 51. 77. 63. 23. 87. 88. 73. 89. 97. 94. 29. 96. 67. 82. 86. 91. 81. 76.\n",
      " 92. 61. 74. 95.]\n",
      "[' United-States' ' Cuba' ' Jamaica' ' India' ' Haiti' ' Mexico' ' South'\n",
      " ' Puerto-Rico' ' Honduras' ' England' ' Canada' ' Germany' ' Iran'\n",
      " ' Philippines' ' Italy' ' Poland' ' Columbia' ' Cambodia' ' Thailand'\n",
      " ' Ecuador' ' Laos' ' Taiwan' ' Portugal' ' Dominican-Republic'\n",
      " ' El-Salvador' ' France' ' Guatemala' ' China' ' Japan' ' Yugoslavia'\n",
      " ' Peru' ' Outlying-US(Guam-USVI-etc)' ' Scotland' ' Trinadad&Tobago'\n",
      " ' Greece' ' Nicaragua' ' Vietnam' ' Hong' ' Ireland' ' Hungary'\n",
      " ' Holand-Netherlands']\n",
      "[' <=50K' ' >50K']\n"
     ]
    }
   ],
   "source": [
    "# 全部填完\n",
    "for i in df.columns.tolist():\n",
    "    print(df.loc[:,i].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "id": "175ef985-e91c-4ac3-b3db-24e9265f2ae2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>Listing of attributes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39.0</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>77516.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50.0</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>83311.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>215646.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>234721.0</td>\n",
       "      <td>11th</td>\n",
       "      <td>7.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>338409.0</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>Cuba</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32556</th>\n",
       "      <td>27.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>257302.0</td>\n",
       "      <td>Assoc-acdm</td>\n",
       "      <td>12.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Tech-support</td>\n",
       "      <td>Wife</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32557</th>\n",
       "      <td>40.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>154374.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32558</th>\n",
       "      <td>58.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>151910.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Widowed</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32559</th>\n",
       "      <td>22.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>201490.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32560</th>\n",
       "      <td>52.0</td>\n",
       "      <td>Self-emp-inc</td>\n",
       "      <td>287927.0</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Wife</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>15024.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>32561 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age          workclass    fnlwgt    education  education-num  \\\n",
       "0      39.0          State-gov   77516.0    Bachelors           13.0   \n",
       "1      50.0   Self-emp-not-inc   83311.0    Bachelors           13.0   \n",
       "2      38.0            Private  215646.0      HS-grad            9.0   \n",
       "3      53.0            Private  234721.0         11th            7.0   \n",
       "4      28.0            Private  338409.0    Bachelors           13.0   \n",
       "...     ...                ...       ...          ...            ...   \n",
       "32556  27.0            Private  257302.0   Assoc-acdm           12.0   \n",
       "32557  40.0            Private  154374.0      HS-grad            9.0   \n",
       "32558  58.0            Private  151910.0      HS-grad            9.0   \n",
       "32559  22.0            Private  201490.0      HS-grad            9.0   \n",
       "32560  52.0       Self-emp-inc  287927.0      HS-grad            9.0   \n",
       "\n",
       "            marital-status          occupation    relationship    race  \\\n",
       "0            Never-married        Adm-clerical   Not-in-family   White   \n",
       "1       Married-civ-spouse     Exec-managerial         Husband   White   \n",
       "2                 Divorced   Handlers-cleaners   Not-in-family   White   \n",
       "3       Married-civ-spouse   Handlers-cleaners         Husband   Black   \n",
       "4       Married-civ-spouse      Prof-specialty            Wife   Black   \n",
       "...                    ...                 ...             ...     ...   \n",
       "32556   Married-civ-spouse        Tech-support            Wife   White   \n",
       "32557   Married-civ-spouse   Machine-op-inspct         Husband   White   \n",
       "32558              Widowed        Adm-clerical       Unmarried   White   \n",
       "32559        Never-married        Adm-clerical       Own-child   White   \n",
       "32560   Married-civ-spouse     Exec-managerial            Wife   White   \n",
       "\n",
       "           sex  capital-gain  capital-loss  hours-per-week  native-country  \\\n",
       "0         Male        2174.0           0.0            40.0   United-States   \n",
       "1         Male           0.0           0.0            13.0   United-States   \n",
       "2         Male           0.0           0.0            40.0   United-States   \n",
       "3         Male           0.0           0.0            40.0   United-States   \n",
       "4       Female           0.0           0.0            40.0            Cuba   \n",
       "...        ...           ...           ...             ...             ...   \n",
       "32556   Female           0.0           0.0            38.0   United-States   \n",
       "32557     Male           0.0           0.0            40.0   United-States   \n",
       "32558   Female           0.0           0.0            40.0   United-States   \n",
       "32559     Male           0.0           0.0            20.0   United-States   \n",
       "32560   Female       15024.0           0.0            40.0   United-States   \n",
       "\n",
       "      Listing of attributes  \n",
       "0                     <=50K  \n",
       "1                     <=50K  \n",
       "2                     <=50K  \n",
       "3                     <=50K  \n",
       "4                     <=50K  \n",
       "...                     ...  \n",
       "32556                 <=50K  \n",
       "32557                  >50K  \n",
       "32558                 <=50K  \n",
       "32559                 <=50K  \n",
       "32560                  >50K  \n",
       "\n",
       "[32561 rows x 15 columns]"
      ]
     },
     "execution_count": 207,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "id": "9a77a865-8eea-4bea-ba57-e528ba846a56",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(r'C:\\Users\\96408\\Desktop\\金融专硕\\数据挖掘作业第二版')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d27553b6-93f1-4d31-ab9c-2bb98be81a1b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "244157d9-b435-4c5b-a4da-1129be608917",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58ea7957-648c-4db5-b315-6565be93f80b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
